1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3# You can obtain one at http://mozilla.org/MPL/2.0/.
4
5from __future__ import absolute_import, print_function
6
7import re
8import sys
9import traceback
10
11import six
12
13__all__ = ["parse", "ParseError", "ExpressionParser"]
14
15# expr.py
16# from:
17# http://k0s.org/mozilla/hg/expressionparser
18# http://hg.mozilla.org/users/tmielczarek_mozilla.com/expressionparser
19
20# Implements a top-down parser/evaluator for simple boolean expressions.
21# ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm
22#
23# Rough grammar:
24# expr := literal
25#       | '(' expr ')'
26#       | expr '&&' expr
27#       | expr '||' expr
28#       | expr '==' expr
29#       | expr '!=' expr
30#       | expr '<' expr
31#       | expr '>' expr
32#       | expr '<=' expr
33#       | expr '>=' expr
34# literal := BOOL
35#          | INT
36#          | STRING
37#          | IDENT
38# BOOL   := true|false
39# INT    := [0-9]+
40# STRING := "[^"]*"
41# IDENT  := [A-Za-z_]\w*
42
43# Identifiers take their values from a mapping dictionary passed as the second
44# argument.
45
46# Glossary (see above URL for details):
47# - nud: null denotation
48# - led: left detonation
49# - lbp: left binding power
50# - rbp: right binding power
51
52
53class ident_token(object):
54    def __init__(self, scanner, value):
55        self.value = value
56
57    def nud(self, parser):
58        # identifiers take their value from the value mappings passed
59        # to the parser
60        return parser.value(self.value)
61
62
63class literal_token(object):
64    def __init__(self, scanner, value):
65        self.value = value
66
67    def nud(self, parser):
68        return self.value
69
70
71class eq_op_token(object):
72    "=="
73
74    def led(self, parser, left):
75        return left == parser.expression(self.lbp)
76
77
78class neq_op_token(object):
79    "!="
80
81    def led(self, parser, left):
82        return left != parser.expression(self.lbp)
83
84
85class lt_op_token(object):
86    "<"
87
88    def led(self, parser, left):
89        return left < parser.expression(self.lbp)
90
91
92class gt_op_token(object):
93    ">"
94
95    def led(self, parser, left):
96        return left > parser.expression(self.lbp)
97
98
99class le_op_token(object):
100    "<="
101
102    def led(self, parser, left):
103        return left <= parser.expression(self.lbp)
104
105
106class ge_op_token(object):
107    ">="
108
109    def led(self, parser, left):
110        return left >= parser.expression(self.lbp)
111
112
113class not_op_token(object):
114    "!"
115
116    def nud(self, parser):
117        return not parser.expression(100)
118
119
120class and_op_token(object):
121    "&&"
122
123    def led(self, parser, left):
124        right = parser.expression(self.lbp)
125        return left and right
126
127
128class or_op_token(object):
129    "||"
130
131    def led(self, parser, left):
132        right = parser.expression(self.lbp)
133        return left or right
134
135
136class lparen_token(object):
137    "("
138
139    def nud(self, parser):
140        expr = parser.expression()
141        parser.advance(rparen_token)
142        return expr
143
144
145class rparen_token(object):
146    ")"
147
148
149class end_token(object):
150    """always ends parsing"""
151
152
153# derived literal tokens
154
155
156class bool_token(literal_token):
157    def __init__(self, scanner, value):
158        value = {"true": True, "false": False}[value]
159        literal_token.__init__(self, scanner, value)
160
161
162class int_token(literal_token):
163    def __init__(self, scanner, value):
164        literal_token.__init__(self, scanner, int(value))
165
166
167class string_token(literal_token):
168    def __init__(self, scanner, value):
169        literal_token.__init__(self, scanner, value[1:-1])
170
171
172precedence = [
173    (end_token, rparen_token),
174    (or_op_token,),
175    (and_op_token,),
176    (lt_op_token, gt_op_token, le_op_token, ge_op_token, eq_op_token, neq_op_token),
177    (lparen_token,),
178]
179for index, rank in enumerate(precedence):
180    for token in rank:
181        token.lbp = index  # lbp = lowest left binding power
182
183
184class ParseError(Exception):
185    """error parsing conditional expression"""
186
187
188class ExpressionParser(object):
189    """
190    A parser for a simple expression language.
191
192    The expression language can be described as follows::
193
194        EXPRESSION ::= LITERAL | '(' EXPRESSION ')' | '!' EXPRESSION | EXPRESSION OP EXPRESSION
195        OP ::= '==' | '!=' | '<' | '>' | '<=' | '>=' | '&&' | '||'
196        LITERAL ::= BOOL | INT | IDENT | STRING
197        BOOL ::= 'true' | 'false'
198        INT ::= [0-9]+
199        IDENT ::= [a-zA-Z_]\w*
200        STRING ::= '"' [^\"] '"' | ''' [^\'] '''
201
202    At its core, expressions consist of booleans, integers, identifiers and.
203    strings. Booleans are one of *true* or *false*. Integers are a series
204    of digits. Identifiers are a series of English letters and underscores.
205    Strings are a pair of matching quote characters (single or double) with
206    zero or more characters inside.
207
208    Expressions can be combined with operators: the equals (==) and not
209    equals (!=) operators compare two expressions and produce a boolean. The
210    and (&&) and or (||) operators take two expressions and produce the logical
211    AND or OR value of them, respectively. An expression can also be prefixed
212    with the not (!) operator, which produces its logical negation.
213
214    Finally, any expression may be contained within parentheses for grouping.
215
216    Identifiers take their values from the mapping provided.
217    """
218
219    scanner = None
220
221    def __init__(self, text, valuemapping, strict=False):
222        """
223        Initialize the parser
224        :param text: The expression to parse as a string.
225        :param valuemapping: A dict mapping identifier names to values.
226        :param strict: If true, referencing an identifier that was not
227                       provided in :valuemapping: will raise an error.
228        """
229        self.text = text
230        self.valuemapping = valuemapping
231        self.strict = strict
232
233    def _tokenize(self):
234        """
235        Lex the input text into tokens and yield them in sequence.
236        """
237        if not ExpressionParser.scanner:
238            ExpressionParser.scanner = re.Scanner(
239                [
240                    # Note: keep these in sync with the class docstring above.
241                    (r"true|false", bool_token),
242                    (r"[a-zA-Z_]\w*", ident_token),
243                    (r"[0-9]+", int_token),
244                    (r'("[^"]*")|(\'[^\']*\')', string_token),
245                    (r"==", eq_op_token()),
246                    (r"!=", neq_op_token()),
247                    (r"<=", le_op_token()),
248                    (r">=", ge_op_token()),
249                    (r"<", lt_op_token()),
250                    (r">", gt_op_token()),
251                    (r"\|\|", or_op_token()),
252                    (r"!", not_op_token()),
253                    (r"&&", and_op_token()),
254                    (r"\(", lparen_token()),
255                    (r"\)", rparen_token()),
256                    (r"\s+", None),  # skip whitespace
257                ]
258            )
259        tokens, remainder = ExpressionParser.scanner.scan(self.text)
260        for t in tokens:
261            yield t
262        yield end_token()
263
264    def value(self, ident):
265        """
266        Look up the value of |ident| in the value mapping passed in the
267        constructor.
268        """
269        if self.strict:
270            return self.valuemapping[ident]
271        else:
272            return self.valuemapping.get(ident, "")
273
274    def advance(self, expected):
275        """
276        Assert that the next token is an instance of |expected|, and advance
277        to the next token.
278        """
279        if not isinstance(self.token, expected):
280            raise Exception("Unexpected token!")
281        self.token = six.next(self.iter)
282
283    def expression(self, rbp=0):
284        """
285        Parse and return the value of an expression until a token with
286        right binding power greater than rbp is encountered.
287        """
288        t = self.token
289        self.token = six.next(self.iter)
290        left = t.nud(self)
291        while rbp < self.token.lbp:
292            t = self.token
293            self.token = six.next(self.iter)
294            left = t.led(self, left)
295        return left
296
297    def parse(self):
298        """
299        Parse and return the value of the expression in the text
300        passed to the constructor. Raises a ParseError if the expression
301        could not be parsed.
302        """
303        try:
304            self.iter = self._tokenize()
305            self.token = six.next(self.iter)
306            return self.expression()
307        except Exception:
308            extype, ex, tb = sys.exc_info()
309            formatted = "".join(traceback.format_exception_only(extype, ex))
310            six.reraise(
311                ParseError,
312                ParseError(
313                    "could not parse: %s\nexception: %svariables: %s"
314                    % (self.text, formatted, self.valuemapping)
315                ),
316                tb,
317            )
318
319    __call__ = parse
320
321
322def parse(text, **values):
323    """
324    Parse and evaluate a boolean expression.
325    :param text: The expression to parse, as a string.
326    :param values: A dict containing a name to value mapping for identifiers
327                   referenced in *text*.
328    :rtype: the final value of the expression.
329    :raises: :py:exc::ParseError: will be raised if parsing fails.
330    """
331    return ExpressionParser(text, values).parse()
332