1<?php 2 3/* 4 * This file is part of the Symfony package. 5 * 6 * (c) Fabien Potencier <fabien@symfony.com> 7 * 8 * For the full copyright and license information, please view the LICENSE 9 * file that was distributed with this source code. 10 */ 11 12namespace Symfony\Component\CssSelector\Parser; 13 14use Symfony\Component\CssSelector\Exception\SyntaxErrorException; 15use Symfony\Component\CssSelector\Node; 16use Symfony\Component\CssSelector\Parser\Tokenizer\Tokenizer; 17 18/** 19 * CSS selector parser. 20 * 21 * This component is a port of the Python cssselect library, 22 * which is copyright Ian Bicking, @see https://github.com/SimonSapin/cssselect. 23 * 24 * @author Jean-François Simon <jeanfrancois.simon@sensiolabs.com> 25 * 26 * @internal 27 */ 28class Parser implements ParserInterface 29{ 30 private $tokenizer; 31 32 public function __construct(Tokenizer $tokenizer = null) 33 { 34 $this->tokenizer = $tokenizer ?: new Tokenizer(); 35 } 36 37 /** 38 * {@inheritdoc} 39 */ 40 public function parse($source) 41 { 42 $reader = new Reader($source); 43 $stream = $this->tokenizer->tokenize($reader); 44 45 return $this->parseSelectorList($stream); 46 } 47 48 /** 49 * Parses the arguments for ":nth-child()" and friends. 50 * 51 * @param Token[] $tokens 52 * 53 * @return array 54 * 55 * @throws SyntaxErrorException 56 */ 57 public static function parseSeries(array $tokens) 58 { 59 foreach ($tokens as $token) { 60 if ($token->isString()) { 61 throw SyntaxErrorException::stringAsFunctionArgument(); 62 } 63 } 64 65 $joined = trim(implode('', array_map(function (Token $token) { 66 return $token->getValue(); 67 }, $tokens))); 68 69 $int = function ($string) { 70 if (!is_numeric($string)) { 71 throw SyntaxErrorException::stringAsFunctionArgument(); 72 } 73 74 return (int) $string; 75 }; 76 77 switch (true) { 78 case 'odd' === $joined: 79 return array(2, 1); 80 case 'even' === $joined: 81 return array(2, 0); 82 case 'n' === $joined: 83 return array(1, 0); 84 case false === strpos($joined, 'n'): 85 return array(0, $int($joined)); 86 } 87 88 $split = explode('n', $joined); 89 $first = isset($split[0]) ? $split[0] : null; 90 91 return array( 92 $first ? ('-' === $first || '+' === $first ? $int($first.'1') : $int($first)) : 1, 93 isset($split[1]) && $split[1] ? $int($split[1]) : 0, 94 ); 95 } 96 97 /** 98 * Parses selector nodes. 99 * 100 * @return array 101 */ 102 private function parseSelectorList(TokenStream $stream) 103 { 104 $stream->skipWhitespace(); 105 $selectors = array(); 106 107 while (true) { 108 $selectors[] = $this->parserSelectorNode($stream); 109 110 if ($stream->getPeek()->isDelimiter(array(','))) { 111 $stream->getNext(); 112 $stream->skipWhitespace(); 113 } else { 114 break; 115 } 116 } 117 118 return $selectors; 119 } 120 121 /** 122 * Parses next selector or combined node. 123 * 124 * @return Node\SelectorNode 125 * 126 * @throws SyntaxErrorException 127 */ 128 private function parserSelectorNode(TokenStream $stream) 129 { 130 list($result, $pseudoElement) = $this->parseSimpleSelector($stream); 131 132 while (true) { 133 $stream->skipWhitespace(); 134 $peek = $stream->getPeek(); 135 136 if ($peek->isFileEnd() || $peek->isDelimiter(array(','))) { 137 break; 138 } 139 140 if (null !== $pseudoElement) { 141 throw SyntaxErrorException::pseudoElementFound($pseudoElement, 'not at the end of a selector'); 142 } 143 144 if ($peek->isDelimiter(array('+', '>', '~'))) { 145 $combinator = $stream->getNext()->getValue(); 146 $stream->skipWhitespace(); 147 } else { 148 $combinator = ' '; 149 } 150 151 list($nextSelector, $pseudoElement) = $this->parseSimpleSelector($stream); 152 $result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector); 153 } 154 155 return new Node\SelectorNode($result, $pseudoElement); 156 } 157 158 /** 159 * Parses next simple node (hash, class, pseudo, negation). 160 * 161 * @param TokenStream $stream 162 * @param bool $insideNegation 163 * 164 * @return array 165 * 166 * @throws SyntaxErrorException 167 */ 168 private function parseSimpleSelector(TokenStream $stream, $insideNegation = false) 169 { 170 $stream->skipWhitespace(); 171 172 $selectorStart = count($stream->getUsed()); 173 $result = $this->parseElementNode($stream); 174 $pseudoElement = null; 175 176 while (true) { 177 $peek = $stream->getPeek(); 178 if ($peek->isWhitespace() 179 || $peek->isFileEnd() 180 || $peek->isDelimiter(array(',', '+', '>', '~')) 181 || ($insideNegation && $peek->isDelimiter(array(')'))) 182 ) { 183 break; 184 } 185 186 if (null !== $pseudoElement) { 187 throw SyntaxErrorException::pseudoElementFound($pseudoElement, 'not at the end of a selector'); 188 } 189 190 if ($peek->isHash()) { 191 $result = new Node\HashNode($result, $stream->getNext()->getValue()); 192 } elseif ($peek->isDelimiter(array('.'))) { 193 $stream->getNext(); 194 $result = new Node\ClassNode($result, $stream->getNextIdentifier()); 195 } elseif ($peek->isDelimiter(array('['))) { 196 $stream->getNext(); 197 $result = $this->parseAttributeNode($result, $stream); 198 } elseif ($peek->isDelimiter(array(':'))) { 199 $stream->getNext(); 200 201 if ($stream->getPeek()->isDelimiter(array(':'))) { 202 $stream->getNext(); 203 $pseudoElement = $stream->getNextIdentifier(); 204 205 continue; 206 } 207 208 $identifier = $stream->getNextIdentifier(); 209 if (in_array(strtolower($identifier), array('first-line', 'first-letter', 'before', 'after'))) { 210 // Special case: CSS 2.1 pseudo-elements can have a single ':'. 211 // Any new pseudo-element must have two. 212 $pseudoElement = $identifier; 213 214 continue; 215 } 216 217 if (!$stream->getPeek()->isDelimiter(array('('))) { 218 $result = new Node\PseudoNode($result, $identifier); 219 220 continue; 221 } 222 223 $stream->getNext(); 224 $stream->skipWhitespace(); 225 226 if ('not' === strtolower($identifier)) { 227 if ($insideNegation) { 228 throw SyntaxErrorException::nestedNot(); 229 } 230 231 list($argument, $argumentPseudoElement) = $this->parseSimpleSelector($stream, true); 232 $next = $stream->getNext(); 233 234 if (null !== $argumentPseudoElement) { 235 throw SyntaxErrorException::pseudoElementFound($argumentPseudoElement, 'inside ::not()'); 236 } 237 238 if (!$next->isDelimiter(array(')'))) { 239 throw SyntaxErrorException::unexpectedToken('")"', $next); 240 } 241 242 $result = new Node\NegationNode($result, $argument); 243 } else { 244 $arguments = array(); 245 $next = null; 246 247 while (true) { 248 $stream->skipWhitespace(); 249 $next = $stream->getNext(); 250 251 if ($next->isIdentifier() 252 || $next->isString() 253 || $next->isNumber() 254 || $next->isDelimiter(array('+', '-')) 255 ) { 256 $arguments[] = $next; 257 } elseif ($next->isDelimiter(array(')'))) { 258 break; 259 } else { 260 throw SyntaxErrorException::unexpectedToken('an argument', $next); 261 } 262 } 263 264 if (empty($arguments)) { 265 throw SyntaxErrorException::unexpectedToken('at least one argument', $next); 266 } 267 268 $result = new Node\FunctionNode($result, $identifier, $arguments); 269 } 270 } else { 271 throw SyntaxErrorException::unexpectedToken('selector', $peek); 272 } 273 } 274 275 if (count($stream->getUsed()) === $selectorStart) { 276 throw SyntaxErrorException::unexpectedToken('selector', $stream->getPeek()); 277 } 278 279 return array($result, $pseudoElement); 280 } 281 282 /** 283 * Parses next element node. 284 * 285 * @return Node\ElementNode 286 */ 287 private function parseElementNode(TokenStream $stream) 288 { 289 $peek = $stream->getPeek(); 290 291 if ($peek->isIdentifier() || $peek->isDelimiter(array('*'))) { 292 if ($peek->isIdentifier()) { 293 $namespace = $stream->getNext()->getValue(); 294 } else { 295 $stream->getNext(); 296 $namespace = null; 297 } 298 299 if ($stream->getPeek()->isDelimiter(array('|'))) { 300 $stream->getNext(); 301 $element = $stream->getNextIdentifierOrStar(); 302 } else { 303 $element = $namespace; 304 $namespace = null; 305 } 306 } else { 307 $element = $namespace = null; 308 } 309 310 return new Node\ElementNode($namespace, $element); 311 } 312 313 /** 314 * Parses next attribute node. 315 * 316 * @return Node\AttributeNode 317 * 318 * @throws SyntaxErrorException 319 */ 320 private function parseAttributeNode(Node\NodeInterface $selector, TokenStream $stream) 321 { 322 $stream->skipWhitespace(); 323 $attribute = $stream->getNextIdentifierOrStar(); 324 325 if (null === $attribute && !$stream->getPeek()->isDelimiter(array('|'))) { 326 throw SyntaxErrorException::unexpectedToken('"|"', $stream->getPeek()); 327 } 328 329 if ($stream->getPeek()->isDelimiter(array('|'))) { 330 $stream->getNext(); 331 332 if ($stream->getPeek()->isDelimiter(array('='))) { 333 $namespace = null; 334 $stream->getNext(); 335 $operator = '|='; 336 } else { 337 $namespace = $attribute; 338 $attribute = $stream->getNextIdentifier(); 339 $operator = null; 340 } 341 } else { 342 $namespace = $operator = null; 343 } 344 345 if (null === $operator) { 346 $stream->skipWhitespace(); 347 $next = $stream->getNext(); 348 349 if ($next->isDelimiter(array(']'))) { 350 return new Node\AttributeNode($selector, $namespace, $attribute, 'exists', null); 351 } elseif ($next->isDelimiter(array('='))) { 352 $operator = '='; 353 } elseif ($next->isDelimiter(array('^', '$', '*', '~', '|', '!')) 354 && $stream->getPeek()->isDelimiter(array('=')) 355 ) { 356 $operator = $next->getValue().'='; 357 $stream->getNext(); 358 } else { 359 throw SyntaxErrorException::unexpectedToken('operator', $next); 360 } 361 } 362 363 $stream->skipWhitespace(); 364 $value = $stream->getNext(); 365 366 if ($value->isNumber()) { 367 // if the value is a number, it's casted into a string 368 $value = new Token(Token::TYPE_STRING, (string) $value->getValue(), $value->getPosition()); 369 } 370 371 if (!($value->isIdentifier() || $value->isString())) { 372 throw SyntaxErrorException::unexpectedToken('string or identifier', $value); 373 } 374 375 $stream->skipWhitespace(); 376 $next = $stream->getNext(); 377 378 if (!$next->isDelimiter(array(']'))) { 379 throw SyntaxErrorException::unexpectedToken('"]"', $next); 380 } 381 382 return new Node\AttributeNode($selector, $namespace, $attribute, $operator, $value->getValue()); 383 } 384} 385