1<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\ExpressionLanguage;
13
14/**
15 * Parsers a token stream.
16 *
17 * This parser implements a "Precedence climbing" algorithm.
18 *
19 * @see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm
20 * @see http://en.wikipedia.org/wiki/Operator-precedence_parser
21 *
22 * @author Fabien Potencier <fabien@symfony.com>
23 */
24class Parser
25{
26    const OPERATOR_LEFT = 1;
27    const OPERATOR_RIGHT = 2;
28
29    private $stream;
30    private $unaryOperators;
31    private $binaryOperators;
32    private $functions;
33    private $names;
34
35    public function __construct(array $functions)
36    {
37        $this->functions = $functions;
38
39        $this->unaryOperators = [
40            'not' => ['precedence' => 50],
41            '!' => ['precedence' => 50],
42            '-' => ['precedence' => 500],
43            '+' => ['precedence' => 500],
44        ];
45        $this->binaryOperators = [
46            'or' => ['precedence' => 10, 'associativity' => self::OPERATOR_LEFT],
47            '||' => ['precedence' => 10, 'associativity' => self::OPERATOR_LEFT],
48            'and' => ['precedence' => 15, 'associativity' => self::OPERATOR_LEFT],
49            '&&' => ['precedence' => 15, 'associativity' => self::OPERATOR_LEFT],
50            '|' => ['precedence' => 16, 'associativity' => self::OPERATOR_LEFT],
51            '^' => ['precedence' => 17, 'associativity' => self::OPERATOR_LEFT],
52            '&' => ['precedence' => 18, 'associativity' => self::OPERATOR_LEFT],
53            '==' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
54            '===' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
55            '!=' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
56            '!==' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
57            '<' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
58            '>' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
59            '>=' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
60            '<=' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
61            'not in' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
62            'in' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
63            'matches' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
64            '..' => ['precedence' => 25, 'associativity' => self::OPERATOR_LEFT],
65            '+' => ['precedence' => 30, 'associativity' => self::OPERATOR_LEFT],
66            '-' => ['precedence' => 30, 'associativity' => self::OPERATOR_LEFT],
67            '~' => ['precedence' => 40, 'associativity' => self::OPERATOR_LEFT],
68            '*' => ['precedence' => 60, 'associativity' => self::OPERATOR_LEFT],
69            '/' => ['precedence' => 60, 'associativity' => self::OPERATOR_LEFT],
70            '%' => ['precedence' => 60, 'associativity' => self::OPERATOR_LEFT],
71            '**' => ['precedence' => 200, 'associativity' => self::OPERATOR_RIGHT],
72        ];
73    }
74
75    /**
76     * Converts a token stream to a node tree.
77     *
78     * The valid names is an array where the values
79     * are the names that the user can use in an expression.
80     *
81     * If the variable name in the compiled PHP code must be
82     * different, define it as the key.
83     *
84     * For instance, ['this' => 'container'] means that the
85     * variable 'container' can be used in the expression
86     * but the compiled code will use 'this'.
87     *
88     * @return Node\Node A node tree
89     *
90     * @throws SyntaxError
91     */
92    public function parse(TokenStream $stream, array $names = [])
93    {
94        $this->stream = $stream;
95        $this->names = $names;
96
97        $node = $this->parseExpression();
98        if (!$stream->isEOF()) {
99            throw new SyntaxError(sprintf('Unexpected token "%s" of value "%s".', $stream->current->type, $stream->current->value), $stream->current->cursor, $stream->getExpression());
100        }
101
102        return $node;
103    }
104
105    public function parseExpression(int $precedence = 0)
106    {
107        $expr = $this->getPrimary();
108        $token = $this->stream->current;
109        while ($token->test(Token::OPERATOR_TYPE) && isset($this->binaryOperators[$token->value]) && $this->binaryOperators[$token->value]['precedence'] >= $precedence) {
110            $op = $this->binaryOperators[$token->value];
111            $this->stream->next();
112
113            $expr1 = $this->parseExpression(self::OPERATOR_LEFT === $op['associativity'] ? $op['precedence'] + 1 : $op['precedence']);
114            $expr = new Node\BinaryNode($token->value, $expr, $expr1);
115
116            $token = $this->stream->current;
117        }
118
119        if (0 === $precedence) {
120            return $this->parseConditionalExpression($expr);
121        }
122
123        return $expr;
124    }
125
126    protected function getPrimary()
127    {
128        $token = $this->stream->current;
129
130        if ($token->test(Token::OPERATOR_TYPE) && isset($this->unaryOperators[$token->value])) {
131            $operator = $this->unaryOperators[$token->value];
132            $this->stream->next();
133            $expr = $this->parseExpression($operator['precedence']);
134
135            return $this->parsePostfixExpression(new Node\UnaryNode($token->value, $expr));
136        }
137
138        if ($token->test(Token::PUNCTUATION_TYPE, '(')) {
139            $this->stream->next();
140            $expr = $this->parseExpression();
141            $this->stream->expect(Token::PUNCTUATION_TYPE, ')', 'An opened parenthesis is not properly closed');
142
143            return $this->parsePostfixExpression($expr);
144        }
145
146        return $this->parsePrimaryExpression();
147    }
148
149    protected function parseConditionalExpression(Node\Node $expr)
150    {
151        while ($this->stream->current->test(Token::PUNCTUATION_TYPE, '?')) {
152            $this->stream->next();
153            if (!$this->stream->current->test(Token::PUNCTUATION_TYPE, ':')) {
154                $expr2 = $this->parseExpression();
155                if ($this->stream->current->test(Token::PUNCTUATION_TYPE, ':')) {
156                    $this->stream->next();
157                    $expr3 = $this->parseExpression();
158                } else {
159                    $expr3 = new Node\ConstantNode(null);
160                }
161            } else {
162                $this->stream->next();
163                $expr2 = $expr;
164                $expr3 = $this->parseExpression();
165            }
166
167            $expr = new Node\ConditionalNode($expr, $expr2, $expr3);
168        }
169
170        return $expr;
171    }
172
173    public function parsePrimaryExpression()
174    {
175        $token = $this->stream->current;
176        switch ($token->type) {
177            case Token::NAME_TYPE:
178                $this->stream->next();
179                switch ($token->value) {
180                    case 'true':
181                    case 'TRUE':
182                        return new Node\ConstantNode(true);
183
184                    case 'false':
185                    case 'FALSE':
186                        return new Node\ConstantNode(false);
187
188                    case 'null':
189                    case 'NULL':
190                        return new Node\ConstantNode(null);
191
192                    default:
193                        if ('(' === $this->stream->current->value) {
194                            if (false === isset($this->functions[$token->value])) {
195                                throw new SyntaxError(sprintf('The function "%s" does not exist.', $token->value), $token->cursor, $this->stream->getExpression(), $token->value, array_keys($this->functions));
196                            }
197
198                            $node = new Node\FunctionNode($token->value, $this->parseArguments());
199                        } else {
200                            if (!\in_array($token->value, $this->names, true)) {
201                                throw new SyntaxError(sprintf('Variable "%s" is not valid.', $token->value), $token->cursor, $this->stream->getExpression(), $token->value, $this->names);
202                            }
203
204                            // is the name used in the compiled code different
205                            // from the name used in the expression?
206                            if (\is_int($name = array_search($token->value, $this->names))) {
207                                $name = $token->value;
208                            }
209
210                            $node = new Node\NameNode($name);
211                        }
212                }
213                break;
214
215            case Token::NUMBER_TYPE:
216            case Token::STRING_TYPE:
217                $this->stream->next();
218
219                return new Node\ConstantNode($token->value);
220
221            default:
222                if ($token->test(Token::PUNCTUATION_TYPE, '[')) {
223                    $node = $this->parseArrayExpression();
224                } elseif ($token->test(Token::PUNCTUATION_TYPE, '{')) {
225                    $node = $this->parseHashExpression();
226                } else {
227                    throw new SyntaxError(sprintf('Unexpected token "%s" of value "%s".', $token->type, $token->value), $token->cursor, $this->stream->getExpression());
228                }
229        }
230
231        return $this->parsePostfixExpression($node);
232    }
233
234    public function parseArrayExpression()
235    {
236        $this->stream->expect(Token::PUNCTUATION_TYPE, '[', 'An array element was expected');
237
238        $node = new Node\ArrayNode();
239        $first = true;
240        while (!$this->stream->current->test(Token::PUNCTUATION_TYPE, ']')) {
241            if (!$first) {
242                $this->stream->expect(Token::PUNCTUATION_TYPE, ',', 'An array element must be followed by a comma');
243
244                // trailing ,?
245                if ($this->stream->current->test(Token::PUNCTUATION_TYPE, ']')) {
246                    break;
247                }
248            }
249            $first = false;
250
251            $node->addElement($this->parseExpression());
252        }
253        $this->stream->expect(Token::PUNCTUATION_TYPE, ']', 'An opened array is not properly closed');
254
255        return $node;
256    }
257
258    public function parseHashExpression()
259    {
260        $this->stream->expect(Token::PUNCTUATION_TYPE, '{', 'A hash element was expected');
261
262        $node = new Node\ArrayNode();
263        $first = true;
264        while (!$this->stream->current->test(Token::PUNCTUATION_TYPE, '}')) {
265            if (!$first) {
266                $this->stream->expect(Token::PUNCTUATION_TYPE, ',', 'A hash value must be followed by a comma');
267
268                // trailing ,?
269                if ($this->stream->current->test(Token::PUNCTUATION_TYPE, '}')) {
270                    break;
271                }
272            }
273            $first = false;
274
275            // a hash key can be:
276            //
277            //  * a number -- 12
278            //  * a string -- 'a'
279            //  * a name, which is equivalent to a string -- a
280            //  * an expression, which must be enclosed in parentheses -- (1 + 2)
281            if ($this->stream->current->test(Token::STRING_TYPE) || $this->stream->current->test(Token::NAME_TYPE) || $this->stream->current->test(Token::NUMBER_TYPE)) {
282                $key = new Node\ConstantNode($this->stream->current->value);
283                $this->stream->next();
284            } elseif ($this->stream->current->test(Token::PUNCTUATION_TYPE, '(')) {
285                $key = $this->parseExpression();
286            } else {
287                $current = $this->stream->current;
288
289                throw new SyntaxError(sprintf('A hash key must be a quoted string, a number, a name, or an expression enclosed in parentheses (unexpected token "%s" of value "%s".', $current->type, $current->value), $current->cursor, $this->stream->getExpression());
290            }
291
292            $this->stream->expect(Token::PUNCTUATION_TYPE, ':', 'A hash key must be followed by a colon (:)');
293            $value = $this->parseExpression();
294
295            $node->addElement($value, $key);
296        }
297        $this->stream->expect(Token::PUNCTUATION_TYPE, '}', 'An opened hash is not properly closed');
298
299        return $node;
300    }
301
302    public function parsePostfixExpression(Node\Node $node)
303    {
304        $token = $this->stream->current;
305        while (Token::PUNCTUATION_TYPE == $token->type) {
306            if ('.' === $token->value) {
307                $this->stream->next();
308                $token = $this->stream->current;
309                $this->stream->next();
310
311                if (
312                    Token::NAME_TYPE !== $token->type
313                    &&
314                    // Operators like "not" and "matches" are valid method or property names,
315                    //
316                    // In other words, besides NAME_TYPE, OPERATOR_TYPE could also be parsed as a property or method.
317                    // This is because operators are processed by the lexer prior to names. So "not" in "foo.not()" or "matches" in "foo.matches" will be recognized as an operator first.
318                    // But in fact, "not" and "matches" in such expressions shall be parsed as method or property names.
319                    //
320                    // And this ONLY works if the operator consists of valid characters for a property or method name.
321                    //
322                    // Other types, such as STRING_TYPE and NUMBER_TYPE, can't be parsed as property nor method names.
323                    //
324                    // As a result, if $token is NOT an operator OR $token->value is NOT a valid property or method name, an exception shall be thrown.
325                    (Token::OPERATOR_TYPE !== $token->type || !preg_match('/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A', $token->value))
326                ) {
327                    throw new SyntaxError('Expected name.', $token->cursor, $this->stream->getExpression());
328                }
329
330                $arg = new Node\ConstantNode($token->value, true);
331
332                $arguments = new Node\ArgumentsNode();
333                if ($this->stream->current->test(Token::PUNCTUATION_TYPE, '(')) {
334                    $type = Node\GetAttrNode::METHOD_CALL;
335                    foreach ($this->parseArguments()->nodes as $n) {
336                        $arguments->addElement($n);
337                    }
338                } else {
339                    $type = Node\GetAttrNode::PROPERTY_CALL;
340                }
341
342                $node = new Node\GetAttrNode($node, $arg, $arguments, $type);
343            } elseif ('[' === $token->value) {
344                $this->stream->next();
345                $arg = $this->parseExpression();
346                $this->stream->expect(Token::PUNCTUATION_TYPE, ']');
347
348                $node = new Node\GetAttrNode($node, $arg, new Node\ArgumentsNode(), Node\GetAttrNode::ARRAY_CALL);
349            } else {
350                break;
351            }
352
353            $token = $this->stream->current;
354        }
355
356        return $node;
357    }
358
359    /**
360     * Parses arguments.
361     */
362    public function parseArguments()
363    {
364        $args = [];
365        $this->stream->expect(Token::PUNCTUATION_TYPE, '(', 'A list of arguments must begin with an opening parenthesis');
366        while (!$this->stream->current->test(Token::PUNCTUATION_TYPE, ')')) {
367            if (!empty($args)) {
368                $this->stream->expect(Token::PUNCTUATION_TYPE, ',', 'Arguments must be separated by a comma');
369            }
370
371            $args[] = $this->parseExpression();
372        }
373        $this->stream->expect(Token::PUNCTUATION_TYPE, ')', 'A list of arguments must be closed by a parenthesis');
374
375        return new Node\Node($args);
376    }
377}
378