1# This Source Code Form is subject to the terms of the Mozilla Public 2# License, v. 2.0. If a copy of the MPL was not distributed with this file, 3# You can obtain one at http://mozilla.org/MPL/2.0/. 4 5from __future__ import absolute_import, print_function 6 7import re 8import sys 9import traceback 10 11import six 12 13__all__ = ["parse", "ParseError", "ExpressionParser"] 14 15# expr.py 16# from: 17# http://k0s.org/mozilla/hg/expressionparser 18# http://hg.mozilla.org/users/tmielczarek_mozilla.com/expressionparser 19 20# Implements a top-down parser/evaluator for simple boolean expressions. 21# ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm 22# 23# Rough grammar: 24# expr := literal 25# | '(' expr ')' 26# | expr '&&' expr 27# | expr '||' expr 28# | expr '==' expr 29# | expr '!=' expr 30# | expr '<' expr 31# | expr '>' expr 32# | expr '<=' expr 33# | expr '>=' expr 34# literal := BOOL 35# | INT 36# | STRING 37# | IDENT 38# BOOL := true|false 39# INT := [0-9]+ 40# STRING := "[^"]*" 41# IDENT := [A-Za-z_]\w* 42 43# Identifiers take their values from a mapping dictionary passed as the second 44# argument. 45 46# Glossary (see above URL for details): 47# - nud: null denotation 48# - led: left detonation 49# - lbp: left binding power 50# - rbp: right binding power 51 52 53class ident_token(object): 54 def __init__(self, scanner, value): 55 self.value = value 56 57 def nud(self, parser): 58 # identifiers take their value from the value mappings passed 59 # to the parser 60 return parser.value(self.value) 61 62 63class literal_token(object): 64 def __init__(self, scanner, value): 65 self.value = value 66 67 def nud(self, parser): 68 return self.value 69 70 71class eq_op_token(object): 72 "==" 73 74 def led(self, parser, left): 75 return left == parser.expression(self.lbp) 76 77 78class neq_op_token(object): 79 "!=" 80 81 def led(self, parser, left): 82 return left != parser.expression(self.lbp) 83 84 85class lt_op_token(object): 86 "<" 87 88 def led(self, parser, left): 89 return left < parser.expression(self.lbp) 90 91 92class gt_op_token(object): 93 ">" 94 95 def led(self, parser, left): 96 return left > parser.expression(self.lbp) 97 98 99class le_op_token(object): 100 "<=" 101 102 def led(self, parser, left): 103 return left <= parser.expression(self.lbp) 104 105 106class ge_op_token(object): 107 ">=" 108 109 def led(self, parser, left): 110 return left >= parser.expression(self.lbp) 111 112 113class not_op_token(object): 114 "!" 115 116 def nud(self, parser): 117 return not parser.expression(100) 118 119 120class and_op_token(object): 121 "&&" 122 123 def led(self, parser, left): 124 right = parser.expression(self.lbp) 125 return left and right 126 127 128class or_op_token(object): 129 "||" 130 131 def led(self, parser, left): 132 right = parser.expression(self.lbp) 133 return left or right 134 135 136class lparen_token(object): 137 "(" 138 139 def nud(self, parser): 140 expr = parser.expression() 141 parser.advance(rparen_token) 142 return expr 143 144 145class rparen_token(object): 146 ")" 147 148 149class end_token(object): 150 """always ends parsing""" 151 152 153# derived literal tokens 154 155 156class bool_token(literal_token): 157 def __init__(self, scanner, value): 158 value = {"true": True, "false": False}[value] 159 literal_token.__init__(self, scanner, value) 160 161 162class int_token(literal_token): 163 def __init__(self, scanner, value): 164 literal_token.__init__(self, scanner, int(value)) 165 166 167class string_token(literal_token): 168 def __init__(self, scanner, value): 169 literal_token.__init__(self, scanner, value[1:-1]) 170 171 172precedence = [ 173 (end_token, rparen_token), 174 (or_op_token,), 175 (and_op_token,), 176 (lt_op_token, gt_op_token, le_op_token, ge_op_token, eq_op_token, neq_op_token), 177 (lparen_token,), 178] 179for index, rank in enumerate(precedence): 180 for token in rank: 181 token.lbp = index # lbp = lowest left binding power 182 183 184class ParseError(Exception): 185 """error parsing conditional expression""" 186 187 188class ExpressionParser(object): 189 """ 190 A parser for a simple expression language. 191 192 The expression language can be described as follows:: 193 194 EXPRESSION ::= LITERAL | '(' EXPRESSION ')' | '!' EXPRESSION | EXPRESSION OP EXPRESSION 195 OP ::= '==' | '!=' | '<' | '>' | '<=' | '>=' | '&&' | '||' 196 LITERAL ::= BOOL | INT | IDENT | STRING 197 BOOL ::= 'true' | 'false' 198 INT ::= [0-9]+ 199 IDENT ::= [a-zA-Z_]\w* 200 STRING ::= '"' [^\"] '"' | ''' [^\'] ''' 201 202 At its core, expressions consist of booleans, integers, identifiers and. 203 strings. Booleans are one of *true* or *false*. Integers are a series 204 of digits. Identifiers are a series of English letters and underscores. 205 Strings are a pair of matching quote characters (single or double) with 206 zero or more characters inside. 207 208 Expressions can be combined with operators: the equals (==) and not 209 equals (!=) operators compare two expressions and produce a boolean. The 210 and (&&) and or (||) operators take two expressions and produce the logical 211 AND or OR value of them, respectively. An expression can also be prefixed 212 with the not (!) operator, which produces its logical negation. 213 214 Finally, any expression may be contained within parentheses for grouping. 215 216 Identifiers take their values from the mapping provided. 217 """ 218 219 scanner = None 220 221 def __init__(self, text, valuemapping, strict=False): 222 """ 223 Initialize the parser 224 :param text: The expression to parse as a string. 225 :param valuemapping: A dict mapping identifier names to values. 226 :param strict: If true, referencing an identifier that was not 227 provided in :valuemapping: will raise an error. 228 """ 229 self.text = text 230 self.valuemapping = valuemapping 231 self.strict = strict 232 233 def _tokenize(self): 234 """ 235 Lex the input text into tokens and yield them in sequence. 236 """ 237 if not ExpressionParser.scanner: 238 ExpressionParser.scanner = re.Scanner( 239 [ 240 # Note: keep these in sync with the class docstring above. 241 (r"true|false", bool_token), 242 (r"[a-zA-Z_]\w*", ident_token), 243 (r"[0-9]+", int_token), 244 (r'("[^"]*")|(\'[^\']*\')', string_token), 245 (r"==", eq_op_token()), 246 (r"!=", neq_op_token()), 247 (r"<=", le_op_token()), 248 (r">=", ge_op_token()), 249 (r"<", lt_op_token()), 250 (r">", gt_op_token()), 251 (r"\|\|", or_op_token()), 252 (r"!", not_op_token()), 253 (r"&&", and_op_token()), 254 (r"\(", lparen_token()), 255 (r"\)", rparen_token()), 256 (r"\s+", None), # skip whitespace 257 ] 258 ) 259 tokens, remainder = ExpressionParser.scanner.scan(self.text) 260 for t in tokens: 261 yield t 262 yield end_token() 263 264 def value(self, ident): 265 """ 266 Look up the value of |ident| in the value mapping passed in the 267 constructor. 268 """ 269 if self.strict: 270 return self.valuemapping[ident] 271 else: 272 return self.valuemapping.get(ident, "") 273 274 def advance(self, expected): 275 """ 276 Assert that the next token is an instance of |expected|, and advance 277 to the next token. 278 """ 279 if not isinstance(self.token, expected): 280 raise Exception("Unexpected token!") 281 self.token = six.next(self.iter) 282 283 def expression(self, rbp=0): 284 """ 285 Parse and return the value of an expression until a token with 286 right binding power greater than rbp is encountered. 287 """ 288 t = self.token 289 self.token = six.next(self.iter) 290 left = t.nud(self) 291 while rbp < self.token.lbp: 292 t = self.token 293 self.token = six.next(self.iter) 294 left = t.led(self, left) 295 return left 296 297 def parse(self): 298 """ 299 Parse and return the value of the expression in the text 300 passed to the constructor. Raises a ParseError if the expression 301 could not be parsed. 302 """ 303 try: 304 self.iter = self._tokenize() 305 self.token = six.next(self.iter) 306 return self.expression() 307 except Exception: 308 extype, ex, tb = sys.exc_info() 309 formatted = "".join(traceback.format_exception_only(extype, ex)) 310 six.reraise( 311 ParseError, 312 ParseError( 313 "could not parse: %s\nexception: %svariables: %s" 314 % (self.text, formatted, self.valuemapping) 315 ), 316 tb, 317 ) 318 319 __call__ = parse 320 321 322def parse(text, **values): 323 """ 324 Parse and evaluate a boolean expression. 325 :param text: The expression to parse, as a string. 326 :param values: A dict containing a name to value mapping for identifiers 327 referenced in *text*. 328 :rtype: the final value of the expression. 329 :raises: :py:exc::ParseError: will be raised if parsing fails. 330 """ 331 return ExpressionParser(text, values).parse() 332