1<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\CssSelector\Parser;
13
14use Symfony\Component\CssSelector\Exception\SyntaxErrorException;
15use Symfony\Component\CssSelector\Node;
16use Symfony\Component\CssSelector\Parser\Tokenizer\Tokenizer;
17
18/**
19 * CSS selector parser.
20 *
21 * This component is a port of the Python cssselect library,
22 * which is copyright Ian Bicking, @see https://github.com/SimonSapin/cssselect.
23 *
24 * @author Jean-François Simon <jeanfrancois.simon@sensiolabs.com>
25 *
26 * @internal
27 */
28class Parser implements ParserInterface
29{
30    private $tokenizer;
31
32    public function __construct(Tokenizer $tokenizer = null)
33    {
34        $this->tokenizer = $tokenizer ?: new Tokenizer();
35    }
36
37    /**
38     * {@inheritdoc}
39     */
40    public function parse($source)
41    {
42        $reader = new Reader($source);
43        $stream = $this->tokenizer->tokenize($reader);
44
45        return $this->parseSelectorList($stream);
46    }
47
48    /**
49     * Parses the arguments for ":nth-child()" and friends.
50     *
51     * @param Token[] $tokens
52     *
53     * @return array
54     *
55     * @throws SyntaxErrorException
56     */
57    public static function parseSeries(array $tokens)
58    {
59        foreach ($tokens as $token) {
60            if ($token->isString()) {
61                throw SyntaxErrorException::stringAsFunctionArgument();
62            }
63        }
64
65        $joined = trim(implode('', array_map(function (Token $token) {
66            return $token->getValue();
67        }, $tokens)));
68
69        $int = function ($string) {
70            if (!is_numeric($string)) {
71                throw SyntaxErrorException::stringAsFunctionArgument();
72            }
73
74            return (int) $string;
75        };
76
77        switch (true) {
78            case 'odd' === $joined:
79                return array(2, 1);
80            case 'even' === $joined:
81                return array(2, 0);
82            case 'n' === $joined:
83                return array(1, 0);
84            case false === strpos($joined, 'n'):
85                return array(0, $int($joined));
86        }
87
88        $split = explode('n', $joined);
89        $first = isset($split[0]) ? $split[0] : null;
90
91        return array(
92            $first ? ('-' === $first || '+' === $first ? $int($first.'1') : $int($first)) : 1,
93            isset($split[1]) && $split[1] ? $int($split[1]) : 0,
94        );
95    }
96
97    /**
98     * Parses selector nodes.
99     *
100     * @return array
101     */
102    private function parseSelectorList(TokenStream $stream)
103    {
104        $stream->skipWhitespace();
105        $selectors = array();
106
107        while (true) {
108            $selectors[] = $this->parserSelectorNode($stream);
109
110            if ($stream->getPeek()->isDelimiter(array(','))) {
111                $stream->getNext();
112                $stream->skipWhitespace();
113            } else {
114                break;
115            }
116        }
117
118        return $selectors;
119    }
120
121    /**
122     * Parses next selector or combined node.
123     *
124     * @return Node\SelectorNode
125     *
126     * @throws SyntaxErrorException
127     */
128    private function parserSelectorNode(TokenStream $stream)
129    {
130        list($result, $pseudoElement) = $this->parseSimpleSelector($stream);
131
132        while (true) {
133            $stream->skipWhitespace();
134            $peek = $stream->getPeek();
135
136            if ($peek->isFileEnd() || $peek->isDelimiter(array(','))) {
137                break;
138            }
139
140            if (null !== $pseudoElement) {
141                throw SyntaxErrorException::pseudoElementFound($pseudoElement, 'not at the end of a selector');
142            }
143
144            if ($peek->isDelimiter(array('+', '>', '~'))) {
145                $combinator = $stream->getNext()->getValue();
146                $stream->skipWhitespace();
147            } else {
148                $combinator = ' ';
149            }
150
151            list($nextSelector, $pseudoElement) = $this->parseSimpleSelector($stream);
152            $result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector);
153        }
154
155        return new Node\SelectorNode($result, $pseudoElement);
156    }
157
158    /**
159     * Parses next simple node (hash, class, pseudo, negation).
160     *
161     * @param TokenStream $stream
162     * @param bool        $insideNegation
163     *
164     * @return array
165     *
166     * @throws SyntaxErrorException
167     */
168    private function parseSimpleSelector(TokenStream $stream, $insideNegation = false)
169    {
170        $stream->skipWhitespace();
171
172        $selectorStart = count($stream->getUsed());
173        $result = $this->parseElementNode($stream);
174        $pseudoElement = null;
175
176        while (true) {
177            $peek = $stream->getPeek();
178            if ($peek->isWhitespace()
179                || $peek->isFileEnd()
180                || $peek->isDelimiter(array(',', '+', '>', '~'))
181                || ($insideNegation && $peek->isDelimiter(array(')')))
182            ) {
183                break;
184            }
185
186            if (null !== $pseudoElement) {
187                throw SyntaxErrorException::pseudoElementFound($pseudoElement, 'not at the end of a selector');
188            }
189
190            if ($peek->isHash()) {
191                $result = new Node\HashNode($result, $stream->getNext()->getValue());
192            } elseif ($peek->isDelimiter(array('.'))) {
193                $stream->getNext();
194                $result = new Node\ClassNode($result, $stream->getNextIdentifier());
195            } elseif ($peek->isDelimiter(array('['))) {
196                $stream->getNext();
197                $result = $this->parseAttributeNode($result, $stream);
198            } elseif ($peek->isDelimiter(array(':'))) {
199                $stream->getNext();
200
201                if ($stream->getPeek()->isDelimiter(array(':'))) {
202                    $stream->getNext();
203                    $pseudoElement = $stream->getNextIdentifier();
204
205                    continue;
206                }
207
208                $identifier = $stream->getNextIdentifier();
209                if (in_array(strtolower($identifier), array('first-line', 'first-letter', 'before', 'after'))) {
210                    // Special case: CSS 2.1 pseudo-elements can have a single ':'.
211                    // Any new pseudo-element must have two.
212                    $pseudoElement = $identifier;
213
214                    continue;
215                }
216
217                if (!$stream->getPeek()->isDelimiter(array('('))) {
218                    $result = new Node\PseudoNode($result, $identifier);
219
220                    continue;
221                }
222
223                $stream->getNext();
224                $stream->skipWhitespace();
225
226                if ('not' === strtolower($identifier)) {
227                    if ($insideNegation) {
228                        throw SyntaxErrorException::nestedNot();
229                    }
230
231                    list($argument, $argumentPseudoElement) = $this->parseSimpleSelector($stream, true);
232                    $next = $stream->getNext();
233
234                    if (null !== $argumentPseudoElement) {
235                        throw SyntaxErrorException::pseudoElementFound($argumentPseudoElement, 'inside ::not()');
236                    }
237
238                    if (!$next->isDelimiter(array(')'))) {
239                        throw SyntaxErrorException::unexpectedToken('")"', $next);
240                    }
241
242                    $result = new Node\NegationNode($result, $argument);
243                } else {
244                    $arguments = array();
245                    $next = null;
246
247                    while (true) {
248                        $stream->skipWhitespace();
249                        $next = $stream->getNext();
250
251                        if ($next->isIdentifier()
252                            || $next->isString()
253                            || $next->isNumber()
254                            || $next->isDelimiter(array('+', '-'))
255                        ) {
256                            $arguments[] = $next;
257                        } elseif ($next->isDelimiter(array(')'))) {
258                            break;
259                        } else {
260                            throw SyntaxErrorException::unexpectedToken('an argument', $next);
261                        }
262                    }
263
264                    if (empty($arguments)) {
265                        throw SyntaxErrorException::unexpectedToken('at least one argument', $next);
266                    }
267
268                    $result = new Node\FunctionNode($result, $identifier, $arguments);
269                }
270            } else {
271                throw SyntaxErrorException::unexpectedToken('selector', $peek);
272            }
273        }
274
275        if (count($stream->getUsed()) === $selectorStart) {
276            throw SyntaxErrorException::unexpectedToken('selector', $stream->getPeek());
277        }
278
279        return array($result, $pseudoElement);
280    }
281
282    /**
283     * Parses next element node.
284     *
285     * @return Node\ElementNode
286     */
287    private function parseElementNode(TokenStream $stream)
288    {
289        $peek = $stream->getPeek();
290
291        if ($peek->isIdentifier() || $peek->isDelimiter(array('*'))) {
292            if ($peek->isIdentifier()) {
293                $namespace = $stream->getNext()->getValue();
294            } else {
295                $stream->getNext();
296                $namespace = null;
297            }
298
299            if ($stream->getPeek()->isDelimiter(array('|'))) {
300                $stream->getNext();
301                $element = $stream->getNextIdentifierOrStar();
302            } else {
303                $element = $namespace;
304                $namespace = null;
305            }
306        } else {
307            $element = $namespace = null;
308        }
309
310        return new Node\ElementNode($namespace, $element);
311    }
312
313    /**
314     * Parses next attribute node.
315     *
316     * @return Node\AttributeNode
317     *
318     * @throws SyntaxErrorException
319     */
320    private function parseAttributeNode(Node\NodeInterface $selector, TokenStream $stream)
321    {
322        $stream->skipWhitespace();
323        $attribute = $stream->getNextIdentifierOrStar();
324
325        if (null === $attribute && !$stream->getPeek()->isDelimiter(array('|'))) {
326            throw SyntaxErrorException::unexpectedToken('"|"', $stream->getPeek());
327        }
328
329        if ($stream->getPeek()->isDelimiter(array('|'))) {
330            $stream->getNext();
331
332            if ($stream->getPeek()->isDelimiter(array('='))) {
333                $namespace = null;
334                $stream->getNext();
335                $operator = '|=';
336            } else {
337                $namespace = $attribute;
338                $attribute = $stream->getNextIdentifier();
339                $operator = null;
340            }
341        } else {
342            $namespace = $operator = null;
343        }
344
345        if (null === $operator) {
346            $stream->skipWhitespace();
347            $next = $stream->getNext();
348
349            if ($next->isDelimiter(array(']'))) {
350                return new Node\AttributeNode($selector, $namespace, $attribute, 'exists', null);
351            } elseif ($next->isDelimiter(array('='))) {
352                $operator = '=';
353            } elseif ($next->isDelimiter(array('^', '$', '*', '~', '|', '!'))
354                && $stream->getPeek()->isDelimiter(array('='))
355            ) {
356                $operator = $next->getValue().'=';
357                $stream->getNext();
358            } else {
359                throw SyntaxErrorException::unexpectedToken('operator', $next);
360            }
361        }
362
363        $stream->skipWhitespace();
364        $value = $stream->getNext();
365
366        if ($value->isNumber()) {
367            // if the value is a number, it's casted into a string
368            $value = new Token(Token::TYPE_STRING, (string) $value->getValue(), $value->getPosition());
369        }
370
371        if (!($value->isIdentifier() || $value->isString())) {
372            throw SyntaxErrorException::unexpectedToken('string or identifier', $value);
373        }
374
375        $stream->skipWhitespace();
376        $next = $stream->getNext();
377
378        if (!$next->isDelimiter(array(']'))) {
379            throw SyntaxErrorException::unexpectedToken('"]"', $next);
380        }
381
382        return new Node\AttributeNode($selector, $namespace, $attribute, $operator, $value->getValue());
383    }
384}
385