1""" 2Top level ``eval`` module. 3""" 4 5import tokenize 6from typing import Optional 7import warnings 8 9from pandas._libs.lib import no_default 10from pandas.util._validators import validate_bool_kwarg 11 12from pandas.core.computation.engines import ENGINES 13from pandas.core.computation.expr import PARSERS, Expr 14from pandas.core.computation.parsing import tokenize_string 15from pandas.core.computation.scope import ensure_scope 16 17from pandas.io.formats.printing import pprint_thing 18 19 20def _check_engine(engine: Optional[str]) -> str: 21 """ 22 Make sure a valid engine is passed. 23 24 Parameters 25 ---------- 26 engine : str 27 String to validate. 28 29 Raises 30 ------ 31 KeyError 32 * If an invalid engine is passed. 33 ImportError 34 * If numexpr was requested but doesn't exist. 35 36 Returns 37 ------- 38 str 39 Engine name. 40 """ 41 from pandas.core.computation.check import NUMEXPR_INSTALLED 42 43 if engine is None: 44 engine = "numexpr" if NUMEXPR_INSTALLED else "python" 45 46 if engine not in ENGINES: 47 valid_engines = list(ENGINES.keys()) 48 raise KeyError( 49 f"Invalid engine '{engine}' passed, valid engines are {valid_engines}" 50 ) 51 52 # TODO: validate this in a more general way (thinking of future engines 53 # that won't necessarily be import-able) 54 # Could potentially be done on engine instantiation 55 if engine == "numexpr" and not NUMEXPR_INSTALLED: 56 raise ImportError( 57 "'numexpr' is not installed or an unsupported version. Cannot use " 58 "engine='numexpr' for query/eval if 'numexpr' is not installed" 59 ) 60 61 return engine 62 63 64def _check_parser(parser: str): 65 """ 66 Make sure a valid parser is passed. 67 68 Parameters 69 ---------- 70 parser : str 71 72 Raises 73 ------ 74 KeyError 75 * If an invalid parser is passed 76 """ 77 if parser not in PARSERS: 78 raise KeyError( 79 f"Invalid parser '{parser}' passed, valid parsers are {PARSERS.keys()}" 80 ) 81 82 83def _check_resolvers(resolvers): 84 if resolvers is not None: 85 for resolver in resolvers: 86 if not hasattr(resolver, "__getitem__"): 87 name = type(resolver).__name__ 88 raise TypeError( 89 f"Resolver of type '{name}' does not " 90 "implement the __getitem__ method" 91 ) 92 93 94def _check_expression(expr): 95 """ 96 Make sure an expression is not an empty string 97 98 Parameters 99 ---------- 100 expr : object 101 An object that can be converted to a string 102 103 Raises 104 ------ 105 ValueError 106 * If expr is an empty string 107 """ 108 if not expr: 109 raise ValueError("expr cannot be an empty string") 110 111 112def _convert_expression(expr) -> str: 113 """ 114 Convert an object to an expression. 115 116 This function converts an object to an expression (a unicode string) and 117 checks to make sure it isn't empty after conversion. This is used to 118 convert operators to their string representation for recursive calls to 119 :func:`~pandas.eval`. 120 121 Parameters 122 ---------- 123 expr : object 124 The object to be converted to a string. 125 126 Returns 127 ------- 128 str 129 The string representation of an object. 130 131 Raises 132 ------ 133 ValueError 134 * If the expression is empty. 135 """ 136 s = pprint_thing(expr) 137 _check_expression(s) 138 return s 139 140 141def _check_for_locals(expr: str, stack_level: int, parser: str): 142 143 at_top_of_stack = stack_level == 0 144 not_pandas_parser = parser != "pandas" 145 146 if not_pandas_parser: 147 msg = "The '@' prefix is only supported by the pandas parser" 148 elif at_top_of_stack: 149 msg = ( 150 "The '@' prefix is not allowed in top-level eval calls.\n" 151 "please refer to your variables by name without the '@' prefix." 152 ) 153 154 if at_top_of_stack or not_pandas_parser: 155 for toknum, tokval in tokenize_string(expr): 156 if toknum == tokenize.OP and tokval == "@": 157 raise SyntaxError(msg) 158 159 160def eval( 161 expr, 162 parser="pandas", 163 engine: Optional[str] = None, 164 truediv=no_default, 165 local_dict=None, 166 global_dict=None, 167 resolvers=(), 168 level=0, 169 target=None, 170 inplace=False, 171): 172 """ 173 Evaluate a Python expression as a string using various backends. 174 175 The following arithmetic operations are supported: ``+``, ``-``, ``*``, 176 ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following 177 boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not). 178 Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`, 179 :keyword:`or`, and :keyword:`not` with the same semantics as the 180 corresponding bitwise operators. :class:`~pandas.Series` and 181 :class:`~pandas.DataFrame` objects are supported and behave as they would 182 with plain ol' Python evaluation. 183 184 Parameters 185 ---------- 186 expr : str 187 The expression to evaluate. This string cannot contain any Python 188 `statements 189 <https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__, 190 only Python `expressions 191 <https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__. 192 parser : {'pandas', 'python'}, default 'pandas' 193 The parser to use to construct the syntax tree from the expression. The 194 default of ``'pandas'`` parses code slightly different than standard 195 Python. Alternatively, you can parse an expression using the 196 ``'python'`` parser to retain strict Python semantics. See the 197 :ref:`enhancing performance <enhancingperf.eval>` documentation for 198 more details. 199 engine : {'python', 'numexpr'}, default 'numexpr' 200 201 The engine used to evaluate the expression. Supported engines are 202 203 - None : tries to use ``numexpr``, falls back to ``python`` 204 - ``'numexpr'``: This default engine evaluates pandas objects using 205 numexpr for large speed ups in complex expressions 206 with large frames. 207 - ``'python'``: Performs operations as if you had ``eval``'d in top 208 level python. This engine is generally not that useful. 209 210 More backends may be available in the future. 211 212 truediv : bool, optional 213 Whether to use true division, like in Python >= 3. 214 215 .. deprecated:: 1.0.0 216 217 local_dict : dict or None, optional 218 A dictionary of local variables, taken from locals() by default. 219 global_dict : dict or None, optional 220 A dictionary of global variables, taken from globals() by default. 221 resolvers : list of dict-like or None, optional 222 A list of objects implementing the ``__getitem__`` special method that 223 you can use to inject an additional collection of namespaces to use for 224 variable lookup. For example, this is used in the 225 :meth:`~DataFrame.query` method to inject the 226 ``DataFrame.index`` and ``DataFrame.columns`` 227 variables that refer to their respective :class:`~pandas.DataFrame` 228 instance attributes. 229 level : int, optional 230 The number of prior stack frames to traverse and add to the current 231 scope. Most users will **not** need to change this parameter. 232 target : object, optional, default None 233 This is the target object for assignment. It is used when there is 234 variable assignment in the expression. If so, then `target` must 235 support item assignment with string keys, and if a copy is being 236 returned, it must also support `.copy()`. 237 inplace : bool, default False 238 If `target` is provided, and the expression mutates `target`, whether 239 to modify `target` inplace. Otherwise, return a copy of `target` with 240 the mutation. 241 242 Returns 243 ------- 244 ndarray, numeric scalar, DataFrame, Series, or None 245 The completion value of evaluating the given code or None if ``inplace=True``. 246 247 Raises 248 ------ 249 ValueError 250 There are many instances where such an error can be raised: 251 252 - `target=None`, but the expression is multiline. 253 - The expression is multiline, but not all them have item assignment. 254 An example of such an arrangement is this: 255 256 a = b + 1 257 a + 2 258 259 Here, there are expressions on different lines, making it multiline, 260 but the last line has no variable assigned to the output of `a + 2`. 261 - `inplace=True`, but the expression is missing item assignment. 262 - Item assignment is provided, but the `target` does not support 263 string item assignment. 264 - Item assignment is provided and `inplace=False`, but the `target` 265 does not support the `.copy()` method 266 267 See Also 268 -------- 269 DataFrame.query : Evaluates a boolean expression to query the columns 270 of a frame. 271 DataFrame.eval : Evaluate a string describing operations on 272 DataFrame columns. 273 274 Notes 275 ----- 276 The ``dtype`` of any objects involved in an arithmetic ``%`` operation are 277 recursively cast to ``float64``. 278 279 See the :ref:`enhancing performance <enhancingperf.eval>` documentation for 280 more details. 281 282 Examples 283 -------- 284 >>> df = pd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]}) 285 >>> df 286 animal age 287 0 dog 10 288 1 pig 20 289 290 We can add a new column using ``pd.eval``: 291 292 >>> pd.eval("double_age = df.age * 2", target=df) 293 animal age double_age 294 0 dog 10 20 295 1 pig 20 40 296 """ 297 inplace = validate_bool_kwarg(inplace, "inplace") 298 299 if truediv is not no_default: 300 warnings.warn( 301 ( 302 "The `truediv` parameter in pd.eval is deprecated and " 303 "will be removed in a future version." 304 ), 305 FutureWarning, 306 stacklevel=2, 307 ) 308 309 if isinstance(expr, str): 310 _check_expression(expr) 311 exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""] 312 else: 313 exprs = [expr] 314 multi_line = len(exprs) > 1 315 316 if multi_line and target is None: 317 raise ValueError( 318 "multi-line expressions are only valid in the " 319 "context of data, use DataFrame.eval" 320 ) 321 engine = _check_engine(engine) 322 _check_parser(parser) 323 _check_resolvers(resolvers) 324 325 ret = None 326 first_expr = True 327 target_modified = False 328 329 for expr in exprs: 330 expr = _convert_expression(expr) 331 _check_for_locals(expr, level, parser) 332 333 # get our (possibly passed-in) scope 334 env = ensure_scope( 335 level + 1, 336 global_dict=global_dict, 337 local_dict=local_dict, 338 resolvers=resolvers, 339 target=target, 340 ) 341 342 parsed_expr = Expr(expr, engine=engine, parser=parser, env=env) 343 344 # construct the engine and evaluate the parsed expression 345 eng = ENGINES[engine] 346 eng_inst = eng(parsed_expr) 347 ret = eng_inst.evaluate() 348 349 if parsed_expr.assigner is None: 350 if multi_line: 351 raise ValueError( 352 "Multi-line expressions are only valid " 353 "if all expressions contain an assignment" 354 ) 355 elif inplace: 356 raise ValueError("Cannot operate inplace if there is no assignment") 357 358 # assign if needed 359 assigner = parsed_expr.assigner 360 if env.target is not None and assigner is not None: 361 target_modified = True 362 363 # if returning a copy, copy only on the first assignment 364 if not inplace and first_expr: 365 try: 366 target = env.target.copy() 367 except AttributeError as err: 368 raise ValueError("Cannot return a copy of the target") from err 369 else: 370 target = env.target 371 372 # TypeError is most commonly raised (e.g. int, list), but you 373 # get IndexError if you try to do this assignment on np.ndarray. 374 # we will ignore numpy warnings here; e.g. if trying 375 # to use a non-numeric indexer 376 try: 377 with warnings.catch_warnings(record=True): 378 # TODO: Filter the warnings we actually care about here. 379 target[assigner] = ret 380 except (TypeError, IndexError) as err: 381 raise ValueError("Cannot assign expression output to target") from err 382 383 if not resolvers: 384 resolvers = ({assigner: ret},) 385 else: 386 # existing resolver needs updated to handle 387 # case of mutating existing column in copy 388 for resolver in resolvers: 389 if assigner in resolver: 390 resolver[assigner] = ret 391 break 392 else: 393 resolvers += ({assigner: ret},) 394 395 ret = None 396 first_expr = False 397 398 # We want to exclude `inplace=None` as being False. 399 if inplace is False: 400 return target if target_modified else ret 401