1"""
2Top level ``eval`` module.
3"""
4
5import tokenize
6from typing import Optional
7import warnings
8
9from pandas._libs.lib import no_default
10from pandas.util._validators import validate_bool_kwarg
11
12from pandas.core.computation.engines import ENGINES
13from pandas.core.computation.expr import PARSERS, Expr
14from pandas.core.computation.parsing import tokenize_string
15from pandas.core.computation.scope import ensure_scope
16
17from pandas.io.formats.printing import pprint_thing
18
19
20def _check_engine(engine: Optional[str]) -> str:
21    """
22    Make sure a valid engine is passed.
23
24    Parameters
25    ----------
26    engine : str
27        String to validate.
28
29    Raises
30    ------
31    KeyError
32      * If an invalid engine is passed.
33    ImportError
34      * If numexpr was requested but doesn't exist.
35
36    Returns
37    -------
38    str
39        Engine name.
40    """
41    from pandas.core.computation.check import NUMEXPR_INSTALLED
42
43    if engine is None:
44        engine = "numexpr" if NUMEXPR_INSTALLED else "python"
45
46    if engine not in ENGINES:
47        valid_engines = list(ENGINES.keys())
48        raise KeyError(
49            f"Invalid engine '{engine}' passed, valid engines are {valid_engines}"
50        )
51
52    # TODO: validate this in a more general way (thinking of future engines
53    # that won't necessarily be import-able)
54    # Could potentially be done on engine instantiation
55    if engine == "numexpr" and not NUMEXPR_INSTALLED:
56        raise ImportError(
57            "'numexpr' is not installed or an unsupported version. Cannot use "
58            "engine='numexpr' for query/eval if 'numexpr' is not installed"
59        )
60
61    return engine
62
63
64def _check_parser(parser: str):
65    """
66    Make sure a valid parser is passed.
67
68    Parameters
69    ----------
70    parser : str
71
72    Raises
73    ------
74    KeyError
75      * If an invalid parser is passed
76    """
77    if parser not in PARSERS:
78        raise KeyError(
79            f"Invalid parser '{parser}' passed, valid parsers are {PARSERS.keys()}"
80        )
81
82
83def _check_resolvers(resolvers):
84    if resolvers is not None:
85        for resolver in resolvers:
86            if not hasattr(resolver, "__getitem__"):
87                name = type(resolver).__name__
88                raise TypeError(
89                    f"Resolver of type '{name}' does not "
90                    "implement the __getitem__ method"
91                )
92
93
94def _check_expression(expr):
95    """
96    Make sure an expression is not an empty string
97
98    Parameters
99    ----------
100    expr : object
101        An object that can be converted to a string
102
103    Raises
104    ------
105    ValueError
106      * If expr is an empty string
107    """
108    if not expr:
109        raise ValueError("expr cannot be an empty string")
110
111
112def _convert_expression(expr) -> str:
113    """
114    Convert an object to an expression.
115
116    This function converts an object to an expression (a unicode string) and
117    checks to make sure it isn't empty after conversion. This is used to
118    convert operators to their string representation for recursive calls to
119    :func:`~pandas.eval`.
120
121    Parameters
122    ----------
123    expr : object
124        The object to be converted to a string.
125
126    Returns
127    -------
128    str
129        The string representation of an object.
130
131    Raises
132    ------
133    ValueError
134      * If the expression is empty.
135    """
136    s = pprint_thing(expr)
137    _check_expression(s)
138    return s
139
140
141def _check_for_locals(expr: str, stack_level: int, parser: str):
142
143    at_top_of_stack = stack_level == 0
144    not_pandas_parser = parser != "pandas"
145
146    if not_pandas_parser:
147        msg = "The '@' prefix is only supported by the pandas parser"
148    elif at_top_of_stack:
149        msg = (
150            "The '@' prefix is not allowed in top-level eval calls.\n"
151            "please refer to your variables by name without the '@' prefix."
152        )
153
154    if at_top_of_stack or not_pandas_parser:
155        for toknum, tokval in tokenize_string(expr):
156            if toknum == tokenize.OP and tokval == "@":
157                raise SyntaxError(msg)
158
159
160def eval(
161    expr,
162    parser="pandas",
163    engine: Optional[str] = None,
164    truediv=no_default,
165    local_dict=None,
166    global_dict=None,
167    resolvers=(),
168    level=0,
169    target=None,
170    inplace=False,
171):
172    """
173    Evaluate a Python expression as a string using various backends.
174
175    The following arithmetic operations are supported: ``+``, ``-``, ``*``,
176    ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following
177    boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not).
178    Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`,
179    :keyword:`or`, and :keyword:`not` with the same semantics as the
180    corresponding bitwise operators.  :class:`~pandas.Series` and
181    :class:`~pandas.DataFrame` objects are supported and behave as they would
182    with plain ol' Python evaluation.
183
184    Parameters
185    ----------
186    expr : str
187        The expression to evaluate. This string cannot contain any Python
188        `statements
189        <https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__,
190        only Python `expressions
191        <https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__.
192    parser : {'pandas', 'python'}, default 'pandas'
193        The parser to use to construct the syntax tree from the expression. The
194        default of ``'pandas'`` parses code slightly different than standard
195        Python. Alternatively, you can parse an expression using the
196        ``'python'`` parser to retain strict Python semantics.  See the
197        :ref:`enhancing performance <enhancingperf.eval>` documentation for
198        more details.
199    engine : {'python', 'numexpr'}, default 'numexpr'
200
201        The engine used to evaluate the expression. Supported engines are
202
203        - None         : tries to use ``numexpr``, falls back to ``python``
204        - ``'numexpr'``: This default engine evaluates pandas objects using
205                         numexpr for large speed ups in complex expressions
206                         with large frames.
207        - ``'python'``: Performs operations as if you had ``eval``'d in top
208                        level python. This engine is generally not that useful.
209
210        More backends may be available in the future.
211
212    truediv : bool, optional
213        Whether to use true division, like in Python >= 3.
214
215        .. deprecated:: 1.0.0
216
217    local_dict : dict or None, optional
218        A dictionary of local variables, taken from locals() by default.
219    global_dict : dict or None, optional
220        A dictionary of global variables, taken from globals() by default.
221    resolvers : list of dict-like or None, optional
222        A list of objects implementing the ``__getitem__`` special method that
223        you can use to inject an additional collection of namespaces to use for
224        variable lookup. For example, this is used in the
225        :meth:`~DataFrame.query` method to inject the
226        ``DataFrame.index`` and ``DataFrame.columns``
227        variables that refer to their respective :class:`~pandas.DataFrame`
228        instance attributes.
229    level : int, optional
230        The number of prior stack frames to traverse and add to the current
231        scope. Most users will **not** need to change this parameter.
232    target : object, optional, default None
233        This is the target object for assignment. It is used when there is
234        variable assignment in the expression. If so, then `target` must
235        support item assignment with string keys, and if a copy is being
236        returned, it must also support `.copy()`.
237    inplace : bool, default False
238        If `target` is provided, and the expression mutates `target`, whether
239        to modify `target` inplace. Otherwise, return a copy of `target` with
240        the mutation.
241
242    Returns
243    -------
244    ndarray, numeric scalar, DataFrame, Series, or None
245        The completion value of evaluating the given code or None if ``inplace=True``.
246
247    Raises
248    ------
249    ValueError
250        There are many instances where such an error can be raised:
251
252        - `target=None`, but the expression is multiline.
253        - The expression is multiline, but not all them have item assignment.
254          An example of such an arrangement is this:
255
256          a = b + 1
257          a + 2
258
259          Here, there are expressions on different lines, making it multiline,
260          but the last line has no variable assigned to the output of `a + 2`.
261        - `inplace=True`, but the expression is missing item assignment.
262        - Item assignment is provided, but the `target` does not support
263          string item assignment.
264        - Item assignment is provided and `inplace=False`, but the `target`
265          does not support the `.copy()` method
266
267    See Also
268    --------
269    DataFrame.query : Evaluates a boolean expression to query the columns
270            of a frame.
271    DataFrame.eval : Evaluate a string describing operations on
272            DataFrame columns.
273
274    Notes
275    -----
276    The ``dtype`` of any objects involved in an arithmetic ``%`` operation are
277    recursively cast to ``float64``.
278
279    See the :ref:`enhancing performance <enhancingperf.eval>` documentation for
280    more details.
281
282    Examples
283    --------
284    >>> df = pd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]})
285    >>> df
286      animal  age
287    0    dog   10
288    1    pig   20
289
290    We can add a new column using ``pd.eval``:
291
292    >>> pd.eval("double_age = df.age * 2", target=df)
293      animal  age  double_age
294    0    dog   10          20
295    1    pig   20          40
296    """
297    inplace = validate_bool_kwarg(inplace, "inplace")
298
299    if truediv is not no_default:
300        warnings.warn(
301            (
302                "The `truediv` parameter in pd.eval is deprecated and "
303                "will be removed in a future version."
304            ),
305            FutureWarning,
306            stacklevel=2,
307        )
308
309    if isinstance(expr, str):
310        _check_expression(expr)
311        exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""]
312    else:
313        exprs = [expr]
314    multi_line = len(exprs) > 1
315
316    if multi_line and target is None:
317        raise ValueError(
318            "multi-line expressions are only valid in the "
319            "context of data, use DataFrame.eval"
320        )
321    engine = _check_engine(engine)
322    _check_parser(parser)
323    _check_resolvers(resolvers)
324
325    ret = None
326    first_expr = True
327    target_modified = False
328
329    for expr in exprs:
330        expr = _convert_expression(expr)
331        _check_for_locals(expr, level, parser)
332
333        # get our (possibly passed-in) scope
334        env = ensure_scope(
335            level + 1,
336            global_dict=global_dict,
337            local_dict=local_dict,
338            resolvers=resolvers,
339            target=target,
340        )
341
342        parsed_expr = Expr(expr, engine=engine, parser=parser, env=env)
343
344        # construct the engine and evaluate the parsed expression
345        eng = ENGINES[engine]
346        eng_inst = eng(parsed_expr)
347        ret = eng_inst.evaluate()
348
349        if parsed_expr.assigner is None:
350            if multi_line:
351                raise ValueError(
352                    "Multi-line expressions are only valid "
353                    "if all expressions contain an assignment"
354                )
355            elif inplace:
356                raise ValueError("Cannot operate inplace if there is no assignment")
357
358        # assign if needed
359        assigner = parsed_expr.assigner
360        if env.target is not None and assigner is not None:
361            target_modified = True
362
363            # if returning a copy, copy only on the first assignment
364            if not inplace and first_expr:
365                try:
366                    target = env.target.copy()
367                except AttributeError as err:
368                    raise ValueError("Cannot return a copy of the target") from err
369            else:
370                target = env.target
371
372            # TypeError is most commonly raised (e.g. int, list), but you
373            # get IndexError if you try to do this assignment on np.ndarray.
374            # we will ignore numpy warnings here; e.g. if trying
375            # to use a non-numeric indexer
376            try:
377                with warnings.catch_warnings(record=True):
378                    # TODO: Filter the warnings we actually care about here.
379                    target[assigner] = ret
380            except (TypeError, IndexError) as err:
381                raise ValueError("Cannot assign expression output to target") from err
382
383            if not resolvers:
384                resolvers = ({assigner: ret},)
385            else:
386                # existing resolver needs updated to handle
387                # case of mutating existing column in copy
388                for resolver in resolvers:
389                    if assigner in resolver:
390                        resolver[assigner] = ret
391                        break
392                else:
393                    resolvers += ({assigner: ret},)
394
395            ret = None
396            first_expr = False
397
398    # We want to exclude `inplace=None` as being False.
399    if inplace is False:
400        return target if target_modified else ret
401