1<?php 2declare(strict_types = 1); 3 4namespace TYPO3\CMS\Core\Database\Schema\Parser; 5 6/* 7 * This file is part of the TYPO3 CMS project. 8 * 9 * It is free software; you can redistribute it and/or modify it under 10 * the terms of the GNU General Public License, either version 2 11 * of the License, or any later version. 12 * 13 * For the full copyright and license information, please read the 14 * LICENSE.txt file that was distributed with this source code. 15 * 16 * The TYPO3 project - inspiring people to share! 17 */ 18 19/** 20 * Scans a MySQL CREATE TABLE statement for tokens. 21 */ 22class Lexer extends \Doctrine\Common\Lexer\AbstractLexer 23{ 24 // All tokens that are not valid identifiers must be < 100 25 const T_NONE = 1; 26 const T_STRING = 2; 27 const T_INPUT_PARAMETER = 3; 28 const T_CLOSE_PARENTHESIS = 4; 29 const T_OPEN_PARENTHESIS = 5; 30 const T_COMMA = 6; 31 const T_DIVIDE = 7; 32 const T_DOT = 8; 33 const T_EQUALS = 9; 34 const T_GREATER_THAN = 10; 35 const T_LOWER_THAN = 11; 36 const T_MINUS = 12; 37 const T_MULTIPLY = 13; 38 const T_NEGATE = 14; 39 const T_PLUS = 15; 40 const T_OPEN_CURLY_BRACE = 16; 41 const T_CLOSE_CURLY_BRACE = 17; 42 const T_SEMICOLON = 18; 43 44 // All tokens that are identifiers or keywords that could be considered as identifiers should be >= 100 45 const T_IDENTIFIER = 100; 46 47 // All tokens that could be considered as a data type should be >= 200 48 const T_BIT = 201; 49 const T_TINYINT = 202; 50 const T_SMALLINT = 203; 51 const T_MEDIUMINT = 204; 52 const T_INT = 205; 53 const T_INTEGER = 206; 54 const T_BIGINT = 207; 55 const T_REAL = 208; 56 const T_DOUBLE = 209; 57 const T_FLOAT = 210; 58 const T_DECIMAL = 211; 59 const T_NUMERIC = 212; 60 const T_DATE = 213; 61 const T_TIME = 214; 62 const T_TIMESTAMP = 215; 63 const T_DATETIME = 216; 64 const T_YEAR = 217; 65 const T_CHAR = 218; 66 const T_VARCHAR = 219; 67 const T_BINARY = 220; 68 const T_VARBINARY = 221; 69 const T_TINYBLOB = 222; 70 const T_BLOB = 223; 71 const T_MEDIUMBLOB = 224; 72 const T_LONGBLOB = 225; 73 const T_TINYTEXT = 226; 74 const T_TEXT = 227; 75 const T_MEDIUMTEXT = 228; 76 const T_LONGTEXT = 229; 77 const T_ENUM = 230; 78 const T_SET = 231; 79 const T_JSON = 232; 80 81 // All keyword tokens should be >= 300 82 const T_CREATE = 300; 83 const T_TEMPORARY = 301; 84 const T_TABLE = 302; 85 const T_IF = 303; 86 const T_NOT = 304; 87 const T_EXISTS = 305; 88 const T_CONSTRAINT = 306; 89 const T_INDEX = 307; 90 const T_KEY = 308; 91 const T_FULLTEXT = 309; 92 const T_SPATIAL = 310; 93 const T_PRIMARY = 311; 94 const T_UNIQUE = 312; 95 const T_CHECK = 313; 96 const T_DEFAULT = 314; 97 const T_AUTO_INCREMENT = 315; 98 const T_COMMENT = 316; 99 const T_COLUMN_FORMAT = 317; 100 const T_STORAGE = 318; 101 const T_REFERENCES = 319; 102 const T_NULL = 320; 103 const T_FIXED = 321; 104 const T_DYNAMIC = 322; 105 const T_MEMORY = 323; 106 const T_DISK = 324; 107 const T_UNSIGNED = 325; 108 const T_ZEROFILL = 326; 109 const T_CURRENT_TIMESTAMP = 327; 110 const T_CHARACTER = 328; 111 const T_COLLATE = 329; 112 const T_ASC = 330; 113 const T_DESC = 331; 114 const T_MATCH = 332; 115 const T_FULL = 333; 116 const T_PARTIAL = 334; 117 const T_SIMPLE = 335; 118 const T_ON = 336; 119 const T_UPDATE = 337; 120 const T_DELETE = 338; 121 const T_RESTRICT = 339; 122 const T_CASCADE = 340; 123 const T_NO = 341; 124 const T_ACTION = 342; 125 const T_USING = 343; 126 const T_BTREE = 344; 127 const T_HASH = 345; 128 const T_KEY_BLOCK_SIZE = 346; 129 const T_WITH = 347; 130 const T_PARSER = 348; 131 const T_FOREIGN = 349; 132 const T_ENGINE = 350; 133 const T_AVG_ROW_LENGTH = 351; 134 const T_CHECKSUM = 352; 135 const T_COMPRESSION = 353; 136 const T_CONNECTION = 354; 137 const T_DATA = 355; 138 const T_DIRECTORY = 356; 139 const T_DELAY_KEY_WRITE = 357; 140 const T_ENCRYPTION = 358; 141 const T_INSERT_METHOD = 359; 142 const T_MAX_ROWS = 360; 143 const T_MIN_ROWS = 361; 144 const T_PACK_KEYS = 362; 145 const T_PASSWORD = 363; 146 const T_ROW_FORMAT = 364; 147 const T_STATS_AUTO_RECALC = 365; 148 const T_STATS_PERSISTENT = 366; 149 const T_STATS_SAMPLE_PAGES = 367; 150 const T_TABLESPACE = 368; 151 const T_UNION = 369; 152 const T_PRECISION = 370; 153 154 /** 155 * Creates a new statement scanner object. 156 * 157 * @param string $input A statement string. 158 */ 159 public function __construct($input) 160 { 161 $this->setInput($input); 162 } 163 164 /** 165 * Lexical catchable patterns. 166 * 167 * @return array 168 */ 169 protected function getCatchablePatterns(): array 170 { 171 return [ 172 '(?:-?[0-9]+(?:[\.][0-9]+)*)(?:e[+-]?[0-9]+)?', // numbers 173 '`(?:[^`]|``)*`', // quoted identifiers 174 "'(?:[^']|'')*'", // quoted strings 175 '\)', // closing parenthesis 176 '[a-z0-9$_][\w$]*', // unquoted identifiers 177 ]; 178 } 179 180 /** 181 * Lexical non-catchable patterns. 182 * 183 * @return array 184 */ 185 protected function getNonCatchablePatterns(): array 186 { 187 return ['\s+']; 188 } 189 190 /** 191 * Retrieve token type. Also processes the token value if necessary. 192 * 193 * @param string $value 194 * @return int 195 */ 196 protected function getType(&$value): int 197 { 198 $type = self::T_NONE; 199 200 // Recognize numeric values 201 if (is_numeric($value)) { 202 if (strpos($value, '.') !== false || stripos($value, 'e') !== false) { 203 return self::T_FLOAT; 204 } 205 206 return self::T_INTEGER; 207 } 208 209 // Recognize quoted strings 210 if ($value[0] === "'") { 211 $value = str_replace("''", "'", substr($value, 1, -1)); 212 213 return self::T_STRING; 214 } 215 216 // Recognize quoted strings 217 if ($value[0] === '`') { 218 $value = str_replace('``', '`', substr($value, 1, -1)); 219 220 return self::T_IDENTIFIER; 221 } 222 223 // Recognize identifiers, aliased or qualified names 224 if (ctype_alpha($value[0])) { 225 $name = 'TYPO3\\CMS\\Core\\Database\\Schema\\Parser\\Lexer::T_' . strtoupper($value); 226 227 if (defined($name)) { 228 $type = constant($name); 229 230 if ($type > 100) { 231 return $type; 232 } 233 } 234 235 return self::T_STRING; 236 } 237 238 switch ($value) { 239 // Recognize symbols 240 case '.': 241 return self::T_DOT; 242 case ';': 243 return self::T_SEMICOLON; 244 case ',': 245 return self::T_COMMA; 246 case '(': 247 return self::T_OPEN_PARENTHESIS; 248 case ')': 249 return self::T_CLOSE_PARENTHESIS; 250 case '=': 251 return self::T_EQUALS; 252 case '>': 253 return self::T_GREATER_THAN; 254 case '<': 255 return self::T_LOWER_THAN; 256 case '+': 257 return self::T_PLUS; 258 case '-': 259 return self::T_MINUS; 260 case '*': 261 return self::T_MULTIPLY; 262 case '/': 263 return self::T_DIVIDE; 264 case '!': 265 return self::T_NEGATE; 266 case '{': 267 return self::T_OPEN_CURLY_BRACE; 268 case '}': 269 return self::T_CLOSE_CURLY_BRACE; 270 // Default 271 default: 272 // Do nothing 273 } 274 275 return $type; 276 } 277} 278