1"""Objects modelling VM state. (Frames etc.)."""
2
3import collections
4import itertools
5import logging
6from typing import Collection, Dict, Optional
7
8from pytype import abstract
9from pytype import class_mixin
10from pytype import compare
11from pytype import metrics
12from pytype import mixin
13from pytype import utils
14from pytype.typegraph import cfg
15
16log = logging.getLogger(__name__)
17
18# A special constant, returned by split_conditions() to signal that the
19# condition cannot be satisfied with any known bindings.
20UNSATISFIABLE = object()
21
22
23class FrameState(utils.VirtualMachineWeakrefMixin):
24  """Immutable state object, for attaching to opcodes."""
25
26  __slots__ = ["block_stack", "data_stack", "node", "exception", "why"]
27
28  def __init__(self, data_stack, block_stack, node, vm, exception, why):
29    super().__init__(vm)
30    self.data_stack = data_stack
31    self.block_stack = block_stack
32    self.node = node
33    self.exception = exception
34    self.why = why
35
36  @classmethod
37  def init(cls, node, vm):
38    return FrameState((), (), node, vm, False, None)
39
40  def __setattribute__(self):
41    raise AttributeError("States are immutable.")
42
43  def set_why(self, why):
44    return FrameState(self.data_stack,
45                      self.block_stack,
46                      self.node,
47                      self.vm,
48                      self.exception,
49                      why)
50
51  def push(self, *values):
52    """Push value(s) onto the value stack."""
53    return FrameState(self.data_stack + tuple(values),
54                      self.block_stack,
55                      self.node,
56                      self.vm,
57                      self.exception,
58                      self.why)
59
60  def peek(self, n):
61    """Get a value `n` entries down in the stack, without changing the stack."""
62    return self.data_stack[-n]
63
64  def top(self):
65    return self.data_stack[-1]
66
67  def topn(self, n):
68    if n > 0:
69      return self.data_stack[-n:]
70    else:
71      return ()
72
73  def pop(self):
74    """Pop a value from the value stack."""
75    value = self.data_stack[-1]
76    return FrameState(self.data_stack[:-1],
77                      self.block_stack,
78                      self.node,
79                      self.vm,
80                      self.exception,
81                      self.why), value
82
83  def pop_and_discard(self):
84    """Pop a value from the value stack and discard it."""
85    return FrameState(self.data_stack[:-1],
86                      self.block_stack,
87                      self.node,
88                      self.vm,
89                      self.exception,
90                      self.why)
91
92  def popn(self, n):
93    """Return n values, ordered oldest-to-newest."""
94    if not n:
95      # Not an error: E.g. function calls with no parameters pop zero items
96      return self, ()
97    if len(self.data_stack) < n:
98      raise IndexError("Trying to pop %d values from stack of size %d" %
99                       (n, len(self.data_stack)))
100    values = self.data_stack[-n:]
101    return FrameState(self.data_stack[:-n],
102                      self.block_stack,
103                      self.node,
104                      self.vm,
105                      self.exception,
106                      self.why), values
107
108  def push_block(self, block):
109    """Push a block on to the block stack."""
110    return FrameState(self.data_stack,
111                      self.block_stack + (block,),
112                      self.node,
113                      self.vm,
114                      self.exception,
115                      self.why)
116
117  def pop_block(self):
118    """Pop a block from the block stack."""
119    block = self.block_stack[-1]
120    return FrameState(self.data_stack,
121                      self.block_stack[:-1],
122                      self.node,
123                      self.vm,
124                      self.exception,
125                      self.why), block
126
127  def change_cfg_node(self, node):
128    assert isinstance(node, cfg.CFGNode)
129    if self.node is node:
130      return self
131    return FrameState(self.data_stack,
132                      self.block_stack,
133                      node,
134                      self.vm,
135                      self.exception,
136                      self.why)
137
138  def connect_to_cfg_node(self, node):
139    self.node.ConnectTo(node)
140    return self.change_cfg_node(node)
141
142  def forward_cfg_node(self, condition=None):
143    """Create a new CFG Node connected to the current cfg node.
144
145    Args:
146      condition: A cfg.Binding representing the condition that needs to be true
147        for this node to be reached.
148
149    Returns:
150      A new state which is the same as this state except for the node, which is
151      the new one.
152    """
153    new_node = self.node.ConnectNew(self.vm.frame and
154                                    self.vm.frame.current_opcode and
155                                    self.vm.frame.current_opcode.line,
156                                    condition)
157    return self.change_cfg_node(new_node)
158
159  def merge_into(self, other):
160    """Merge with another state."""
161    if other is None:
162      return self
163    assert len(self.data_stack) == len(other.data_stack)
164    assert len(self.block_stack) == len(other.block_stack)
165    node = other.node
166    if self.node is not node:
167      self.node.ConnectTo(node)
168    both = list(zip(self.data_stack, other.data_stack))
169    if any(v1 is not v2 for v1, v2 in both):
170      for v, o in both:
171        o.PasteVariable(v, None)
172    if self.node is not other.node:
173      self.node.ConnectTo(other.node)
174      return FrameState(other.data_stack,
175                        self.block_stack,
176                        other.node,
177                        self.vm,
178                        self.exception,
179                        self.why)
180    return self
181
182  def set_exception(self):
183    return FrameState(self.data_stack,
184                      self.block_stack,
185                      self.node.ConnectNew(self.vm.frame.current_opcode.line),
186                      self.vm,
187                      True,
188                      self.why)
189
190
191class SimpleFrame:
192  """A lightweight placeholder frame.
193
194  A frame used when we need a placeholder on the stack, e.g., to imitate a
195  function call in order to analyze a function, or to create a dummy stack for
196  error logging.
197  """
198
199  def __init__(self, opcode=None, node=None):
200    self.f_code = None  # for recursion detection
201    self.f_builtins = None
202    self.f_globals = None
203    self.current_opcode = opcode  # for memoization of unknowns
204    self.node = node
205    self.substs = ()
206    self.func = None
207
208
209class Frame(utils.VirtualMachineWeakrefMixin):
210  """An interpreter frame.
211
212  This contains the local value and block stacks and the associated code and
213  pointer. The most complex usage is with generators in which a frame is stored
214  and then repeatedly reactivated. Other than that frames are created executed
215  and then discarded.
216
217  Attributes:
218    f_code: The code object this frame is executing.
219    f_globals: The globals dict used for global name resolution.
220    f_locals: The locals used for name resolution. Will be modified by
221      Frame.__init__ if callargs is passed.
222    f_builtins: Similar for builtins.
223    f_back: The frame above self on the stack.
224    f_lineno: The first line number of the code object.
225    vm: The VirtualMachine instance we belong to.
226    node: The node at which the frame is created.
227    states: A mapping from opcodes to FrameState objects.
228    cells: local variables bound in a closure, or used in a closure.
229    block_stack: A stack of blocks used to manage exceptions, loops, and
230      "with"s.
231    data_stack: The value stack that is used for instruction operands.
232    allowed_returns: The return annotation of this function.
233    check_return: Whether the actual return type of a call should be checked
234      against allowed_returns.
235    return_variable: The return value of this function, as a Variable.
236    yield_variable: The yield value of this function, as a Variable.
237  """
238
239  def __init__(self, node, vm, f_code, f_globals, f_locals, f_back, callargs,
240               closure, func, first_arg: Optional[cfg.Variable],
241               substs: Collection[Dict[str, cfg.Variable]]):
242    """Initialize a special frame as needed by TypegraphVirtualMachine.
243
244    Args:
245      node: The current CFG graph node.
246      vm: The owning virtual machine.
247      f_code: The code object to execute in this frame.
248      f_globals: The global context to execute in as a SimpleValue as
249        used by TypegraphVirtualMachine.
250      f_locals: Local variables. Will be modified if callargs is passed.
251      f_back: The frame above this one on the stack.
252      callargs: Additional function arguments to store in f_locals.
253      closure: A tuple containing the new co_freevars.
254      func: An Optional[cfg.Binding] to the function this frame corresponds to.
255      first_arg: First argument to the function.
256      substs: Maps from type parameter names in scope for this frame to their
257        possible values.
258    Raises:
259      NameError: If we can't resolve any references into the outer frame.
260    """
261    super().__init__(vm)
262    assert isinstance(f_globals, abstract.LazyConcreteDict)
263    assert isinstance(f_locals, abstract.LazyConcreteDict)
264    self.node = node
265    self.current_opcode = None
266    self.f_code = f_code
267    self.states = {}
268    self.f_globals = f_globals
269    self.f_locals = f_locals
270    self.f_back = f_back
271    if f_back and f_back.f_builtins:
272      self.f_builtins = f_back.f_builtins
273    else:
274      _, bltin = self.vm.attribute_handler.get_attribute(
275          self.vm.root_node, f_globals, "__builtins__")
276      builtins_pu, = bltin.bindings
277      self.f_builtins = builtins_pu.data
278    self.f_lineno = f_code.co_firstlineno
279    # The first argument is used to make Python 3 super calls when super is not
280    # passed any arguments.
281    self.first_arg = first_arg
282    self.cells = {}
283
284    self.allowed_returns = None
285    self.check_return = False
286    self.return_variable = self.vm.program.NewVariable()
287    self.yield_variable = self.vm.program.NewVariable()
288
289    # Keep track of the current opcode block and and block targets we add while
290    # executing it; they can potentially be removed if the block returns early.
291    self.current_block = None
292    self.targets = collections.defaultdict(list)
293
294    # A map from function name to @typing.overload-decorated signatures. The
295    # overloads are copied to the implementation in InterpreterFunction.make.
296    self.overloads = collections.defaultdict(list)
297
298    # A closure g communicates with its outer function f through two
299    # fields in CodeType (both of which are tuples of strings):
300    # f.co_cellvars: All f-local variables that are used in g (or any other
301    #                closure).
302    # g.co_freevars: All variables from f that g uses.
303    # Also, note that f.co_cellvars will only also be in f.co_varnames
304    # if they are also parameters of f (because co_varnames[0:co_argcount] are
305    # always the parameters), but won't otherwise.
306    # Cells 0 .. num(cellvars)-1 : cellvar; num(cellvars) .. end : freevar
307    assert len(f_code.co_freevars) == len(closure or [])
308    self.cells = [self.vm.program.NewVariable()
309                  for _ in f_code.co_cellvars]
310    self.cells.extend(closure or [])
311
312    if callargs:
313      for name, value in sorted(callargs.items()):
314        if name in f_code.co_cellvars:
315          i = f_code.co_cellvars.index(name)
316          self.cells[i].PasteVariable(value, node)
317        else:
318          self.vm.attribute_handler.set_attribute(node, f_locals, name, value)
319    # Python 3 supports calling 'super' without any arguments. In such a case
320    # the implicit type argument is inserted into __build_class__'s cellvars
321    # and propagated as a closure variable to all method/functions calling
322    # 'super' without any arguments.
323    # If this is a frame for the function called by __build_class__ (see
324    # abstract.BuildClass), then we will store a reference to the variable
325    # corresponding to the cellvar named "__class__" separately for convenient
326    # access. After the class is built, abstract.BuildClass.call will add the
327    # binding for the new class into this variable.
328    self.class_closure_var = None
329    if func and isinstance(func.data, abstract.InterpreterFunction):
330      closure_name = abstract.BuildClass.CLOSURE_NAME
331      if func.data.is_class_builder and closure_name in f_code.co_cellvars:
332        i = f_code.co_cellvars.index(closure_name)
333        self.class_closure_var = self.cells[i]
334    self.func = func
335    self.substs = substs
336
337  def __repr__(self):     # pragma: no cover
338    return "<Frame at 0x%08x: %r @ %d>" % (
339        id(self), self.f_code.co_filename, self.f_lineno
340    )
341
342  @property
343  def type_params(self):
344    return set(itertools.chain.from_iterable(self.substs))
345
346
347class Condition:
348  """Represents a condition due to if-splitting.
349
350  Properties:
351    node: A CFGNode.
352    binding: A Binding for the condition's constraints.
353  """
354
355  def __init__(self, node, dnf):
356    # The condition is represented by a dummy variable with a single binding
357    # to None.  The origins for this binding are the dnf clauses.
358    self._var = node.program.NewVariable()
359    self._binding = self._var.AddBinding(None)
360    for clause in dnf:
361      sources = set(clause)
362      self._binding.AddOrigin(node, sources)
363
364  @property
365  def binding(self):
366    return self._binding
367
368
369_restrict_counter = metrics.MapCounter("state_restrict")
370
371
372def split_conditions(node, var):
373  """Return a pair of conditions for the value being true and false."""
374  return (_restrict_condition(node, var.bindings, True),
375          _restrict_condition(node, var.bindings, False))
376
377
378def _restrict_condition(node, bindings, logical_value):
379  """Return a restricted condition based on filtered bindings.
380
381  Args:
382    node: The CFGNode.
383    bindings: A sequence of bindings.
384    logical_value: Either True or False.
385
386  Returns:
387    A Condition or None.  Each binding is checked for compatibility with
388    logical_value.  If either no bindings match, or all bindings match, then
389    None is returned.  Otherwise a new Condition is built from the specified,
390    compatible, bindings.
391  """
392  dnf = []
393  restricted = False
394  for b in bindings:
395    match = compare.compatible_with(b.data, logical_value)
396    if match is True:  # pylint: disable=g-bool-id-comparison
397      dnf.append([b])
398    elif match is False:  # pylint: disable=g-bool-id-comparison
399      restricted = True
400    else:
401      dnf.extend(match)
402      # In theory, the value could have returned [[b]] as its DNF, in which
403      # case this isn't really a restriction.  However in practice this is
404      # very unlikely to occur, and treating it as a restriction will not
405      # cause any problems.
406      restricted = True
407  if not dnf:
408    _restrict_counter.inc("unsatisfiable")
409    return UNSATISFIABLE
410  elif restricted:
411    _restrict_counter.inc("restricted")
412    return Condition(node, dnf)
413  else:
414    _restrict_counter.inc("unrestricted")
415    return None
416
417
418def _is_or_is_not_cmp(left, right, is_not=False):
419  """Implementation of 'left is right' amd 'left is not right'."""
420  if (isinstance(left, mixin.PythonConstant) and
421      isinstance(right, mixin.PythonConstant)):
422    if left.cls != right.cls:
423      return is_not
424    return is_not ^ (left.pyval == right.pyval)
425  elif (isinstance(left, abstract.Instance) and
426        isinstance(right, abstract.Instance)):
427    if left.cls != right.cls:
428      # If those were the same they could be the same but we can't be sure from
429      # comparing types.
430      return is_not
431    return None
432  elif (isinstance(left, class_mixin.Class) and
433        isinstance(right, class_mixin.Class)):
434    # types are singletons. We use the name so that, e.g., two different
435    # TupleClass instances compare as identical.
436    return is_not ^ (left.full_name == right.full_name)
437  else:
438    return None
439
440
441def is_cmp(left, right):
442  return _is_or_is_not_cmp(left, right, is_not=False)
443
444
445def is_not_cmp(left, right):
446  return _is_or_is_not_cmp(left, right, is_not=True)
447