1<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\ExpressionLanguage;
13
14/**
15 * Parsers a token stream.
16 *
17 * This parser implements a "Precedence climbing" algorithm.
18 *
19 * @see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm
20 * @see http://en.wikipedia.org/wiki/Operator-precedence_parser
21 *
22 * @author Fabien Potencier <fabien@symfony.com>
23 */
24class Parser
25{
26    public const OPERATOR_LEFT = 1;
27    public const OPERATOR_RIGHT = 2;
28
29    private $stream;
30    private $unaryOperators;
31    private $binaryOperators;
32    private $functions;
33    private $names;
34    private $lint;
35
36    public function __construct(array $functions)
37    {
38        $this->functions = $functions;
39
40        $this->unaryOperators = [
41            'not' => ['precedence' => 50],
42            '!' => ['precedence' => 50],
43            '-' => ['precedence' => 500],
44            '+' => ['precedence' => 500],
45        ];
46        $this->binaryOperators = [
47            'or' => ['precedence' => 10, 'associativity' => self::OPERATOR_LEFT],
48            '||' => ['precedence' => 10, 'associativity' => self::OPERATOR_LEFT],
49            'and' => ['precedence' => 15, 'associativity' => self::OPERATOR_LEFT],
50            '&&' => ['precedence' => 15, 'associativity' => self::OPERATOR_LEFT],
51            '|' => ['precedence' => 16, 'associativity' => self::OPERATOR_LEFT],
52            '^' => ['precedence' => 17, 'associativity' => self::OPERATOR_LEFT],
53            '&' => ['precedence' => 18, 'associativity' => self::OPERATOR_LEFT],
54            '==' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
55            '===' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
56            '!=' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
57            '!==' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
58            '<' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
59            '>' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
60            '>=' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
61            '<=' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
62            'not in' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
63            'in' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
64            'matches' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
65            '..' => ['precedence' => 25, 'associativity' => self::OPERATOR_LEFT],
66            '+' => ['precedence' => 30, 'associativity' => self::OPERATOR_LEFT],
67            '-' => ['precedence' => 30, 'associativity' => self::OPERATOR_LEFT],
68            '~' => ['precedence' => 40, 'associativity' => self::OPERATOR_LEFT],
69            '*' => ['precedence' => 60, 'associativity' => self::OPERATOR_LEFT],
70            '/' => ['precedence' => 60, 'associativity' => self::OPERATOR_LEFT],
71            '%' => ['precedence' => 60, 'associativity' => self::OPERATOR_LEFT],
72            '**' => ['precedence' => 200, 'associativity' => self::OPERATOR_RIGHT],
73        ];
74    }
75
76    /**
77     * Converts a token stream to a node tree.
78     *
79     * The valid names is an array where the values
80     * are the names that the user can use in an expression.
81     *
82     * If the variable name in the compiled PHP code must be
83     * different, define it as the key.
84     *
85     * For instance, ['this' => 'container'] means that the
86     * variable 'container' can be used in the expression
87     * but the compiled code will use 'this'.
88     *
89     * @return Node\Node
90     *
91     * @throws SyntaxError
92     */
93    public function parse(TokenStream $stream, array $names = [])
94    {
95        $this->lint = false;
96
97        return $this->doParse($stream, $names);
98    }
99
100    /**
101     * Validates the syntax of an expression.
102     *
103     * The syntax of the passed expression will be checked, but not parsed.
104     * If you want to skip checking dynamic variable names, pass `null` instead of the array.
105     *
106     * @throws SyntaxError When the passed expression is invalid
107     */
108    public function lint(TokenStream $stream, ?array $names = []): void
109    {
110        $this->lint = true;
111        $this->doParse($stream, $names);
112    }
113
114    /**
115     * @throws SyntaxError
116     */
117    private function doParse(TokenStream $stream, ?array $names = []): Node\Node
118    {
119        $this->stream = $stream;
120        $this->names = $names;
121
122        $node = $this->parseExpression();
123        if (!$stream->isEOF()) {
124            throw new SyntaxError(sprintf('Unexpected token "%s" of value "%s".', $stream->current->type, $stream->current->value), $stream->current->cursor, $stream->getExpression());
125        }
126
127        $this->stream = null;
128        $this->names = null;
129
130        return $node;
131    }
132
133    public function parseExpression(int $precedence = 0)
134    {
135        $expr = $this->getPrimary();
136        $token = $this->stream->current;
137        while ($token->test(Token::OPERATOR_TYPE) && isset($this->binaryOperators[$token->value]) && $this->binaryOperators[$token->value]['precedence'] >= $precedence) {
138            $op = $this->binaryOperators[$token->value];
139            $this->stream->next();
140
141            $expr1 = $this->parseExpression(self::OPERATOR_LEFT === $op['associativity'] ? $op['precedence'] + 1 : $op['precedence']);
142            $expr = new Node\BinaryNode($token->value, $expr, $expr1);
143
144            $token = $this->stream->current;
145        }
146
147        if (0 === $precedence) {
148            return $this->parseConditionalExpression($expr);
149        }
150
151        return $expr;
152    }
153
154    protected function getPrimary()
155    {
156        $token = $this->stream->current;
157
158        if ($token->test(Token::OPERATOR_TYPE) && isset($this->unaryOperators[$token->value])) {
159            $operator = $this->unaryOperators[$token->value];
160            $this->stream->next();
161            $expr = $this->parseExpression($operator['precedence']);
162
163            return $this->parsePostfixExpression(new Node\UnaryNode($token->value, $expr));
164        }
165
166        if ($token->test(Token::PUNCTUATION_TYPE, '(')) {
167            $this->stream->next();
168            $expr = $this->parseExpression();
169            $this->stream->expect(Token::PUNCTUATION_TYPE, ')', 'An opened parenthesis is not properly closed');
170
171            return $this->parsePostfixExpression($expr);
172        }
173
174        return $this->parsePrimaryExpression();
175    }
176
177    protected function parseConditionalExpression(Node\Node $expr)
178    {
179        while ($this->stream->current->test(Token::PUNCTUATION_TYPE, '?')) {
180            $this->stream->next();
181            if (!$this->stream->current->test(Token::PUNCTUATION_TYPE, ':')) {
182                $expr2 = $this->parseExpression();
183                if ($this->stream->current->test(Token::PUNCTUATION_TYPE, ':')) {
184                    $this->stream->next();
185                    $expr3 = $this->parseExpression();
186                } else {
187                    $expr3 = new Node\ConstantNode(null);
188                }
189            } else {
190                $this->stream->next();
191                $expr2 = $expr;
192                $expr3 = $this->parseExpression();
193            }
194
195            $expr = new Node\ConditionalNode($expr, $expr2, $expr3);
196        }
197
198        return $expr;
199    }
200
201    public function parsePrimaryExpression()
202    {
203        $token = $this->stream->current;
204        switch ($token->type) {
205            case Token::NAME_TYPE:
206                $this->stream->next();
207                switch ($token->value) {
208                    case 'true':
209                    case 'TRUE':
210                        return new Node\ConstantNode(true);
211
212                    case 'false':
213                    case 'FALSE':
214                        return new Node\ConstantNode(false);
215
216                    case 'null':
217                    case 'NULL':
218                        return new Node\ConstantNode(null);
219
220                    default:
221                        if ('(' === $this->stream->current->value) {
222                            if (false === isset($this->functions[$token->value])) {
223                                throw new SyntaxError(sprintf('The function "%s" does not exist.', $token->value), $token->cursor, $this->stream->getExpression(), $token->value, array_keys($this->functions));
224                            }
225
226                            $node = new Node\FunctionNode($token->value, $this->parseArguments());
227                        } else {
228                            if (!$this->lint || \is_array($this->names)) {
229                                if (!\in_array($token->value, $this->names, true)) {
230                                    throw new SyntaxError(sprintf('Variable "%s" is not valid.', $token->value), $token->cursor, $this->stream->getExpression(), $token->value, $this->names);
231                                }
232
233                                // is the name used in the compiled code different
234                                // from the name used in the expression?
235                                if (\is_int($name = array_search($token->value, $this->names))) {
236                                    $name = $token->value;
237                                }
238                            } else {
239                                $name = $token->value;
240                            }
241
242                            $node = new Node\NameNode($name);
243                        }
244                }
245                break;
246
247            case Token::NUMBER_TYPE:
248            case Token::STRING_TYPE:
249                $this->stream->next();
250
251                return new Node\ConstantNode($token->value);
252
253            default:
254                if ($token->test(Token::PUNCTUATION_TYPE, '[')) {
255                    $node = $this->parseArrayExpression();
256                } elseif ($token->test(Token::PUNCTUATION_TYPE, '{')) {
257                    $node = $this->parseHashExpression();
258                } else {
259                    throw new SyntaxError(sprintf('Unexpected token "%s" of value "%s".', $token->type, $token->value), $token->cursor, $this->stream->getExpression());
260                }
261        }
262
263        return $this->parsePostfixExpression($node);
264    }
265
266    public function parseArrayExpression()
267    {
268        $this->stream->expect(Token::PUNCTUATION_TYPE, '[', 'An array element was expected');
269
270        $node = new Node\ArrayNode();
271        $first = true;
272        while (!$this->stream->current->test(Token::PUNCTUATION_TYPE, ']')) {
273            if (!$first) {
274                $this->stream->expect(Token::PUNCTUATION_TYPE, ',', 'An array element must be followed by a comma');
275
276                // trailing ,?
277                if ($this->stream->current->test(Token::PUNCTUATION_TYPE, ']')) {
278                    break;
279                }
280            }
281            $first = false;
282
283            $node->addElement($this->parseExpression());
284        }
285        $this->stream->expect(Token::PUNCTUATION_TYPE, ']', 'An opened array is not properly closed');
286
287        return $node;
288    }
289
290    public function parseHashExpression()
291    {
292        $this->stream->expect(Token::PUNCTUATION_TYPE, '{', 'A hash element was expected');
293
294        $node = new Node\ArrayNode();
295        $first = true;
296        while (!$this->stream->current->test(Token::PUNCTUATION_TYPE, '}')) {
297            if (!$first) {
298                $this->stream->expect(Token::PUNCTUATION_TYPE, ',', 'A hash value must be followed by a comma');
299
300                // trailing ,?
301                if ($this->stream->current->test(Token::PUNCTUATION_TYPE, '}')) {
302                    break;
303                }
304            }
305            $first = false;
306
307            // a hash key can be:
308            //
309            //  * a number -- 12
310            //  * a string -- 'a'
311            //  * a name, which is equivalent to a string -- a
312            //  * an expression, which must be enclosed in parentheses -- (1 + 2)
313            if ($this->stream->current->test(Token::STRING_TYPE) || $this->stream->current->test(Token::NAME_TYPE) || $this->stream->current->test(Token::NUMBER_TYPE)) {
314                $key = new Node\ConstantNode($this->stream->current->value);
315                $this->stream->next();
316            } elseif ($this->stream->current->test(Token::PUNCTUATION_TYPE, '(')) {
317                $key = $this->parseExpression();
318            } else {
319                $current = $this->stream->current;
320
321                throw new SyntaxError(sprintf('A hash key must be a quoted string, a number, a name, or an expression enclosed in parentheses (unexpected token "%s" of value "%s".', $current->type, $current->value), $current->cursor, $this->stream->getExpression());
322            }
323
324            $this->stream->expect(Token::PUNCTUATION_TYPE, ':', 'A hash key must be followed by a colon (:)');
325            $value = $this->parseExpression();
326
327            $node->addElement($value, $key);
328        }
329        $this->stream->expect(Token::PUNCTUATION_TYPE, '}', 'An opened hash is not properly closed');
330
331        return $node;
332    }
333
334    public function parsePostfixExpression(Node\Node $node)
335    {
336        $token = $this->stream->current;
337        while (Token::PUNCTUATION_TYPE == $token->type) {
338            if ('.' === $token->value) {
339                $this->stream->next();
340                $token = $this->stream->current;
341                $this->stream->next();
342
343                if (
344                    Token::NAME_TYPE !== $token->type
345                    &&
346                    // Operators like "not" and "matches" are valid method or property names,
347                    //
348                    // In other words, besides NAME_TYPE, OPERATOR_TYPE could also be parsed as a property or method.
349                    // This is because operators are processed by the lexer prior to names. So "not" in "foo.not()" or "matches" in "foo.matches" will be recognized as an operator first.
350                    // But in fact, "not" and "matches" in such expressions shall be parsed as method or property names.
351                    //
352                    // And this ONLY works if the operator consists of valid characters for a property or method name.
353                    //
354                    // Other types, such as STRING_TYPE and NUMBER_TYPE, can't be parsed as property nor method names.
355                    //
356                    // As a result, if $token is NOT an operator OR $token->value is NOT a valid property or method name, an exception shall be thrown.
357                    (Token::OPERATOR_TYPE !== $token->type || !preg_match('/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A', $token->value))
358                ) {
359                    throw new SyntaxError('Expected name.', $token->cursor, $this->stream->getExpression());
360                }
361
362                $arg = new Node\ConstantNode($token->value, true);
363
364                $arguments = new Node\ArgumentsNode();
365                if ($this->stream->current->test(Token::PUNCTUATION_TYPE, '(')) {
366                    $type = Node\GetAttrNode::METHOD_CALL;
367                    foreach ($this->parseArguments()->nodes as $n) {
368                        $arguments->addElement($n);
369                    }
370                } else {
371                    $type = Node\GetAttrNode::PROPERTY_CALL;
372                }
373
374                $node = new Node\GetAttrNode($node, $arg, $arguments, $type);
375            } elseif ('[' === $token->value) {
376                $this->stream->next();
377                $arg = $this->parseExpression();
378                $this->stream->expect(Token::PUNCTUATION_TYPE, ']');
379
380                $node = new Node\GetAttrNode($node, $arg, new Node\ArgumentsNode(), Node\GetAttrNode::ARRAY_CALL);
381            } else {
382                break;
383            }
384
385            $token = $this->stream->current;
386        }
387
388        return $node;
389    }
390
391    /**
392     * Parses arguments.
393     */
394    public function parseArguments()
395    {
396        $args = [];
397        $this->stream->expect(Token::PUNCTUATION_TYPE, '(', 'A list of arguments must begin with an opening parenthesis');
398        while (!$this->stream->current->test(Token::PUNCTUATION_TYPE, ')')) {
399            if (!empty($args)) {
400                $this->stream->expect(Token::PUNCTUATION_TYPE, ',', 'Arguments must be separated by a comma');
401            }
402
403            $args[] = $this->parseExpression();
404        }
405        $this->stream->expect(Token::PUNCTUATION_TYPE, ')', 'A list of arguments must be closed by a parenthesis');
406
407        return new Node\Node($args);
408    }
409}
410