1<?php 2/* 3 * $Id$ 4 * 5 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 6 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 7 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 8 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 9 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 10 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 11 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 12 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 13 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 14 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 15 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 16 * 17 * This software consists of voluntary contributions made by many individuals 18 * and is licensed under the LGPL. For more information, see 19 * <http://www.doctrine-project.org>. 20 */ 21 22/** 23 * Doctrine_Query_Tokenizer 24 * 25 * @package Doctrine 26 * @subpackage Query 27 * @license http://www.opensource.org/licenses/lgpl-license.php LGPL 28 * @link www.doctrine-project.org 29 * @since 1.0 30 * @version $Revision$ 31 * @author Konsta Vesterinen <kvesteri@cc.hut.fi> 32 * @author Guilherme Blanco <guilhermeblanco@hotmail.com> 33 * @author Stefan Klug <stefan.klug@googlemail.com> 34 */ 35class Doctrine_Query_Tokenizer 36{ 37 38 /** 39 * Splits the given dql query into an array where keys represent different 40 * query part names and values are arrays splitted using sqlExplode method 41 * 42 * example: 43 * 44 * parameter: 45 * $query = "SELECT u.* FROM User u WHERE u.name LIKE ?" 46 * returns: 47 * array( 48 * 'select' => array('u.*'), 49 * 'from' => array('User', 'u'), 50 * 'where' => array('u.name', 'LIKE', '?') 51 * ); 52 * 53 * @param string $query DQL query 54 * 55 * @throws Doctrine_Query_Exception If some generic parsing error occurs 56 * 57 * @return array An array containing the query string parts 58 */ 59 public function tokenizeQuery($query) 60 { 61 $tokens = $this->sqlExplode($query, ' '); 62 $parts = array(); 63 64 foreach ($tokens as $index => $token) { 65 $token = trim($token); 66 67 switch (strtolower($token)) { 68 case 'delete': 69 case 'update': 70 case 'select': 71 case 'set': 72 case 'from': 73 case 'where': 74 case 'limit': 75 case 'offset': 76 case 'having': 77 $p = $token; 78 //$parts[$token] = array(); 79 $parts[$token] = ''; 80 break; 81 82 case 'order': 83 case 'group': 84 $i = ($index + 1); 85 if (isset($tokens[$i]) && strtolower($tokens[$i]) === 'by') { 86 $p = $token; 87 $parts[$token] = ''; 88 //$parts[$token] = array(); 89 } else { 90 $parts[$p] .= "$token "; 91 //$parts[$p][] = $token; 92 } 93 break; 94 95 case 'by': 96 break; 97 98 default: 99 if ( ! isset($p)) { 100 throw new Doctrine_Query_Tokenizer_Exception( 101 "Couldn't tokenize query. Encountered invalid token: '$token'." 102 ); 103 } 104 105 $parts[$p] .= "$token "; 106 //$parts[$p][] = $token; 107 } 108 } 109 110 return $parts; 111 } 112 113 /** 114 * Trims brackets from string 115 * 116 * @param string $str String to remove the brackets 117 * @param string $e1 First bracket, usually '(' 118 * @param string $e2 Second bracket, usually ')' 119 * 120 * @return string 121 */ 122 public function bracketTrim($str, $e1 = '(', $e2 = ')') 123 { 124 if (substr($str, 0, 1) === $e1 && substr($str, -1) === $e2) { 125 return substr($str, 1, -1); 126 } else { 127 return $str; 128 } 129 } 130 131 /** 132 * Explodes a sql expression respecting bracket placement. 133 * 134 * This method transform a sql expression in an array of simple clauses, 135 * while observing the parentheses precedence. 136 * 137 * Note: bracketExplode always trims the returned pieces 138 * 139 * <code> 140 * $str = (age < 20 AND age > 18) AND email LIKE 'John@example.com' 141 * $clauses = $tokenizer->bracketExplode($str, ' AND ', '(', ')'); 142 * // array("(age < 20 AND age > 18)", "email LIKE 'John@example.com'") 143 * </code> 144 * 145 * @param string $str String to be bracket exploded 146 * @param string $d Delimeter which explodes the string 147 * @param string $e1 First bracket, usually '(' 148 * @param string $e2 Second bracket, usually ')' 149 * 150 * @return array 151 */ 152 public function bracketExplode($str, $d = ' ', $e1 = '(', $e2 = ')') 153 { 154 if (is_string($d)) { 155 $d = array($d); 156 } 157 158 // Bracket explode has to be case insensitive 159 $regexp = $this->getSplitRegExpFromArray($d) . 'i'; 160 $terms = $this->clauseExplodeRegExp($str, $regexp, $e1, $e2); 161 162 $res = array(); 163 164 // Trim is here for historical reasons 165 foreach ($terms as $value) { 166 $res[] = trim($value[0]); 167 } 168 169 return $res; 170 } 171 172 /** 173 * Explode quotes from string 174 * 175 * Note: quoteExplode always trims the returned pieces 176 * 177 * example: 178 * 179 * parameters: 180 * $str = email LIKE 'John@example.com' 181 * $d = ' LIKE ' 182 * 183 * would return an array: 184 * array("email", "LIKE", "'John@example.com'") 185 * 186 * @param string $str String to be quote exploded 187 * @param string $d Delimeter which explodes the string 188 * 189 * @return array 190 */ 191 public function quoteExplode($str, $d = ' ') 192 { 193 if (is_string($d)) { 194 $d = array($d); 195 } 196 197 // According to the testcases quoteExplode is case insensitive 198 $regexp = $this->getSplitRegExpFromArray($d) . 'i'; 199 $terms = $this->clauseExplodeCountBrackets($str, $regexp); 200 201 $res = array(); 202 203 foreach ($terms as $val) { 204 $res[] = trim($val[0]); 205 } 206 207 return $res; 208 } 209 210 /** 211 * Explodes a string into array using custom brackets and 212 * quote delimeters 213 * 214 * Note: sqlExplode trims all returned parts 215 * 216 * example: 217 * 218 * parameters: 219 * $str = "(age < 20 AND age > 18) AND name LIKE 'John Doe'" 220 * $d = ' ' 221 * $e1 = '(' 222 * $e2 = ')' 223 * 224 * would return an array: 225 * array( 226 * '(age < 20 AND age > 18)', 227 * 'name', 228 * 'LIKE', 229 * 'John Doe' 230 * ); 231 * 232 * @param string $str String to be SQL exploded 233 * @param string $d Delimeter which explodes the string 234 * @param string $e1 First bracket, usually '(' 235 * @param string $e2 Second bracket, usually ')' 236 * 237 * @return array 238 */ 239 public function sqlExplode($str, $d = ' ', $e1 = '(', $e2 = ')') 240 { 241 if (is_string($d)) { 242 $d = array($d); 243 } 244 245 $terms = $this->clauseExplode($str, $d, $e1, $e2); 246 $res = array(); 247 248 foreach ($terms as $value) { 249 $res[] = trim($value[0]); 250 } 251 252 return $res; 253 } 254 255 /** 256 * Explodes a string into array using custom brackets and quote delimeters 257 * Each array element is a array of length 2 where the first entry contains 258 * the term, and the second entry contains the corresponding delimiter 259 * 260 * example: 261 * 262 * parameters: 263 * $str = "(age < 20 AND age > 18) AND name LIKE 'John'+' Doe'" 264 * $d = array(' ', '+') 265 * $e1 = '(' 266 * $e2 = ')' 267 * 268 * would return an array: 269 * array( 270 * array('(age < 20 AND age > 18)', ' '), 271 * array('AND', ' '), 272 * array('name', ' '), 273 * array('LIKE', ' '), 274 * array('John', '+'), 275 * array(' Doe', '') 276 * ); 277 * 278 * @param string $str String to be clause exploded 279 * @param string $d Delimeter which explodes the string 280 * @param string $e1 First bracket, usually '(' 281 * @param string $e2 Second bracket, usually ')' 282 * 283 * @return array 284 */ 285 public function clauseExplode($str, array $d, $e1 = '(', $e2 = ')') 286 { 287 $regexp = $this->getSplitRegExpFromArray($d); 288 289 return $this->clauseExplodeRegExp($str, $regexp, $e1, $e2); 290 } 291 292 /** 293 * Builds regular expression for split from array. Return regular 294 * expression to be applied 295 * 296 * @param $d 297 * 298 * @return string 299 */ 300 private function getSplitRegExpFromArray(array $d) 301 { 302 foreach ($d as $key => $string) { 303 $escapedString = preg_quote($string); 304 if (preg_match('#^\w+$#', $string)) { 305 $escapedString = "\W$escapedString\W"; 306 } 307 $d[$key] = $escapedString; 308 } 309 310 if (in_array(' ', $d)) { 311 $d[] = '\s'; 312 } 313 314 return '#(' . implode('|', $d) . ')#'; 315 } 316 317 /** 318 * Same as clauseExplode, but you give a regexp, which splits the string 319 * 320 * @param $str 321 * @param $regexp 322 * @param $e1 323 * @param $e2 324 * 325 * @return array 326 */ 327 private function clauseExplodeRegExp($str, $regexp, $e1 = '(', $e2 = ')') 328 { 329 $terms = $this->clauseExplodeCountBrackets($str, $regexp, $e1, $e2); 330 $terms = $this->mergeBracketTerms($terms); 331 332 // This is only here to comply with the old function signature 333 foreach ($terms as & $val) { 334 unset($val[2]); 335 } 336 337 return $terms; 338 } 339 340 /** 341 * this function is like clauseExplode, but it doesn't merge bracket terms 342 * 343 * @param $str 344 * @param $d 345 * @param $e1 346 * @param $e2 347 * 348 * @return unknown_type 349 */ 350 private function clauseExplodeCountBrackets($str, $regexp, $e1 = '(', $e2 = ')') 351 { 352 $quoteTerms = $this->quotedStringExplode($str); 353 $terms = array(); 354 $i = 0; 355 356 foreach ($quoteTerms as $key => $val) { 357 if ($key & 1) { // a quoted string 358 // If the last term had no ending delimiter, we append the string to the element, 359 // otherwise, we create a new element without delimiter 360 if ($terms[$i - 1][1] == '') { 361 $terms[$i - 1][0] .= $val; 362 } else { 363 $terms[$i++] = array($val, '', 0); 364 } 365 } else { // Not a quoted string 366 // Do the clause explode 367 $subterms = $this->clauseExplodeNonQuoted($val, $regexp); 368 369 foreach ($subterms as &$sub) { 370 $c1 = substr_count($sub[0], $e1); 371 $c2 = substr_count($sub[0], $e2); 372 373 $sub[2] = $c1 - $c2; 374 } 375 376 // If the previous term had no delimiter, merge them 377 if ($i > 0 && $terms[$i - 1][1] == '') { 378 $first = array_shift($subterms); 379 $idx = $i - 1; 380 381 $terms[$idx][0] .= $first[0]; 382 $terms[$idx][1] = $first[1]; 383 $terms[$idx][2] += $first[2]; 384 } 385 386 $terms = array_merge($terms, $subterms); 387 $i += sizeof($subterms); 388 } 389 } 390 391 return $terms; 392 } 393 394 /** 395 * Explodes a string by the given delimiters, and counts quotes in every 396 * term. This function doesn't respect quoted strings. 397 * The returned array contains a array per term. These term array contain 398 * the following elemnts: 399 * [0] = the term itself 400 * [1] = the delimiter splitting this term from the next 401 * [2] = the sum of opening and closing brackets in this term 402 * (eg. -2 means 2 closing brackets (or 1 opening and 3 closing)) 403 * 404 * example: 405 * 406 * parameters: 407 * $str = "a (b '(c+d))'" 408 * $d = array(' ', '+') 409 * 410 * returns: 411 * array( 412 * array('a', ' ', 0), 413 * array('(b', ' ', 1), 414 * array("'(c", '+', 1), 415 * array("d))'", '', -2) 416 * ); 417 * 418 * @param $str 419 * @param $d 420 * @param $e1 421 * @param $e2 422 * 423 * @return array 424 */ 425 private function clauseExplodeNonQuoted($str, $regexp) 426 { 427 $str = preg_split($regexp, $str, -1, PREG_SPLIT_DELIM_CAPTURE); 428 $term = array(); 429 $i = 0; 430 431 foreach ($str as $key => $val) { 432 // Every odd entry is a delimiter, so add it to the previous term entry 433 if ( ! ($key & 1)) { 434 $term[$i] = array($val, ''); 435 } else { 436 $term[$i++][1] = $val; 437 } 438 } 439 440 return $term; 441 } 442 443 /** 444 * This expects input from clauseExplodeNonQuoted. 445 * It will go through the result and merges any bracket terms with 446 * unbalanced bracket count. 447 * Note that only the third parameter in each term is used to get the 448 * bracket overhang. This is needed to be able to handle quoted strings 449 * wich contain brackets 450 * 451 * example: 452 * 453 * parameters: 454 * $terms = array( 455 * array("'a(b'", '+', 0) 456 * array('(2', '+', 1), 457 * array('3)', '-', -1), 458 * array('5', '' , '0') 459 * ); 460 * 461 * would return: 462 * array( 463 * array("'a(b'", '+', 0), 464 * array('(2+3)', '-', 0), 465 * array('5' , '' , 0) 466 * ); 467 * 468 * @param $terms array 469 * 470 * @return array 471 */ 472 private function mergeBracketTerms(array $terms) 473 { 474 $res = array(); 475 $i = 0; 476 477 foreach ($terms as $val) { 478 if ( ! isset($res[$i])) { 479 $res[$i] = array($val[0], $val[1], $val[2]); 480 } else { 481 $res[$i][0] .= $res[$i][1] . $val[0]; 482 $res[$i][1] = $val[1]; 483 $res[$i][2] += $val[2]; 484 } 485 486 // Bracket overhang 487 if ($res[$i][2] == 0) { 488 $i++; 489 } 490 } 491 492 return $res; 493 } 494 495 496 /** 497 * Explodes the given string by <quoted words> 498 * 499 * example: 500 * 501 * paramters: 502 * $str ="'a' AND name = 'John O\'Connor'" 503 * 504 * returns 505 * array("", "'a'", " AND name = ", "'John O\'Connor'") 506 * 507 * Note the trailing empty string. In the result, all even elements are quoted strings. 508 * 509 * @param $str the string to split 510 * 511 * @return array 512 */ 513 public function quotedStringExplode($str) 514 { 515 // Split by all possible incarnations of a quote 516 $split = array("\\'","''","'", "\\\"", "\"\"", "\""); 517 foreach ($split as &$v) { 518 $v = preg_quote($v); 519 } 520 $split = '#(' . implode('|', $split) . ')#'; 521 $str = preg_split($split, $str, -1, PREG_SPLIT_DELIM_CAPTURE); 522 523 $parts = array(); 524 $mode = false; // Mode is either ' or " if the loop is inside a string quoted with ' or " 525 $i = 0; 526 527 foreach ($str as $key => $val) { 528 // This is some kind of quote 529 if ($key & 1) { 530 if ( ! $mode) { 531 if ($val == "'" || $val == "\"") { 532 $mode = $val; 533 $i++; 534 } 535 } else if ($mode == $val) { 536 if ( ! isset($parts[$i])) { 537 $parts[$i] = $val; 538 } else { 539 $parts[$i] .= $val; 540 } 541 542 $mode = false; 543 $i++; 544 545 continue; 546 } 547 } 548 549 if ( ! isset($parts[$i])) { 550 $parts[$i] = $val; 551 } else { 552 $parts[$i] .= $val; 553 } 554 } 555 556 return $parts; 557 } 558} 559