1"""Objects modelling VM state. (Frames etc.).""" 2 3import collections 4import itertools 5import logging 6from typing import Collection, Dict, Optional 7 8from pytype import abstract 9from pytype import class_mixin 10from pytype import compare 11from pytype import metrics 12from pytype import mixin 13from pytype import utils 14from pytype.typegraph import cfg 15 16log = logging.getLogger(__name__) 17 18# A special constant, returned by split_conditions() to signal that the 19# condition cannot be satisfied with any known bindings. 20UNSATISFIABLE = object() 21 22 23class FrameState(utils.VirtualMachineWeakrefMixin): 24 """Immutable state object, for attaching to opcodes.""" 25 26 __slots__ = ["block_stack", "data_stack", "node", "exception", "why"] 27 28 def __init__(self, data_stack, block_stack, node, vm, exception, why): 29 super().__init__(vm) 30 self.data_stack = data_stack 31 self.block_stack = block_stack 32 self.node = node 33 self.exception = exception 34 self.why = why 35 36 @classmethod 37 def init(cls, node, vm): 38 return FrameState((), (), node, vm, False, None) 39 40 def __setattribute__(self): 41 raise AttributeError("States are immutable.") 42 43 def set_why(self, why): 44 return FrameState(self.data_stack, 45 self.block_stack, 46 self.node, 47 self.vm, 48 self.exception, 49 why) 50 51 def push(self, *values): 52 """Push value(s) onto the value stack.""" 53 return FrameState(self.data_stack + tuple(values), 54 self.block_stack, 55 self.node, 56 self.vm, 57 self.exception, 58 self.why) 59 60 def peek(self, n): 61 """Get a value `n` entries down in the stack, without changing the stack.""" 62 return self.data_stack[-n] 63 64 def top(self): 65 return self.data_stack[-1] 66 67 def topn(self, n): 68 if n > 0: 69 return self.data_stack[-n:] 70 else: 71 return () 72 73 def pop(self): 74 """Pop a value from the value stack.""" 75 value = self.data_stack[-1] 76 return FrameState(self.data_stack[:-1], 77 self.block_stack, 78 self.node, 79 self.vm, 80 self.exception, 81 self.why), value 82 83 def pop_and_discard(self): 84 """Pop a value from the value stack and discard it.""" 85 return FrameState(self.data_stack[:-1], 86 self.block_stack, 87 self.node, 88 self.vm, 89 self.exception, 90 self.why) 91 92 def popn(self, n): 93 """Return n values, ordered oldest-to-newest.""" 94 if not n: 95 # Not an error: E.g. function calls with no parameters pop zero items 96 return self, () 97 if len(self.data_stack) < n: 98 raise IndexError("Trying to pop %d values from stack of size %d" % 99 (n, len(self.data_stack))) 100 values = self.data_stack[-n:] 101 return FrameState(self.data_stack[:-n], 102 self.block_stack, 103 self.node, 104 self.vm, 105 self.exception, 106 self.why), values 107 108 def push_block(self, block): 109 """Push a block on to the block stack.""" 110 return FrameState(self.data_stack, 111 self.block_stack + (block,), 112 self.node, 113 self.vm, 114 self.exception, 115 self.why) 116 117 def pop_block(self): 118 """Pop a block from the block stack.""" 119 block = self.block_stack[-1] 120 return FrameState(self.data_stack, 121 self.block_stack[:-1], 122 self.node, 123 self.vm, 124 self.exception, 125 self.why), block 126 127 def change_cfg_node(self, node): 128 assert isinstance(node, cfg.CFGNode) 129 if self.node is node: 130 return self 131 return FrameState(self.data_stack, 132 self.block_stack, 133 node, 134 self.vm, 135 self.exception, 136 self.why) 137 138 def connect_to_cfg_node(self, node): 139 self.node.ConnectTo(node) 140 return self.change_cfg_node(node) 141 142 def forward_cfg_node(self, condition=None): 143 """Create a new CFG Node connected to the current cfg node. 144 145 Args: 146 condition: A cfg.Binding representing the condition that needs to be true 147 for this node to be reached. 148 149 Returns: 150 A new state which is the same as this state except for the node, which is 151 the new one. 152 """ 153 new_node = self.node.ConnectNew(self.vm.frame and 154 self.vm.frame.current_opcode and 155 self.vm.frame.current_opcode.line, 156 condition) 157 return self.change_cfg_node(new_node) 158 159 def merge_into(self, other): 160 """Merge with another state.""" 161 if other is None: 162 return self 163 assert len(self.data_stack) == len(other.data_stack) 164 assert len(self.block_stack) == len(other.block_stack) 165 node = other.node 166 if self.node is not node: 167 self.node.ConnectTo(node) 168 both = list(zip(self.data_stack, other.data_stack)) 169 if any(v1 is not v2 for v1, v2 in both): 170 for v, o in both: 171 o.PasteVariable(v, None) 172 if self.node is not other.node: 173 self.node.ConnectTo(other.node) 174 return FrameState(other.data_stack, 175 self.block_stack, 176 other.node, 177 self.vm, 178 self.exception, 179 self.why) 180 return self 181 182 def set_exception(self): 183 return FrameState(self.data_stack, 184 self.block_stack, 185 self.node.ConnectNew(self.vm.frame.current_opcode.line), 186 self.vm, 187 True, 188 self.why) 189 190 191class SimpleFrame: 192 """A lightweight placeholder frame. 193 194 A frame used when we need a placeholder on the stack, e.g., to imitate a 195 function call in order to analyze a function, or to create a dummy stack for 196 error logging. 197 """ 198 199 def __init__(self, opcode=None, node=None): 200 self.f_code = None # for recursion detection 201 self.f_builtins = None 202 self.f_globals = None 203 self.current_opcode = opcode # for memoization of unknowns 204 self.node = node 205 self.substs = () 206 self.func = None 207 208 209class Frame(utils.VirtualMachineWeakrefMixin): 210 """An interpreter frame. 211 212 This contains the local value and block stacks and the associated code and 213 pointer. The most complex usage is with generators in which a frame is stored 214 and then repeatedly reactivated. Other than that frames are created executed 215 and then discarded. 216 217 Attributes: 218 f_code: The code object this frame is executing. 219 f_globals: The globals dict used for global name resolution. 220 f_locals: The locals used for name resolution. Will be modified by 221 Frame.__init__ if callargs is passed. 222 f_builtins: Similar for builtins. 223 f_back: The frame above self on the stack. 224 f_lineno: The first line number of the code object. 225 vm: The VirtualMachine instance we belong to. 226 node: The node at which the frame is created. 227 states: A mapping from opcodes to FrameState objects. 228 cells: local variables bound in a closure, or used in a closure. 229 block_stack: A stack of blocks used to manage exceptions, loops, and 230 "with"s. 231 data_stack: The value stack that is used for instruction operands. 232 allowed_returns: The return annotation of this function. 233 check_return: Whether the actual return type of a call should be checked 234 against allowed_returns. 235 return_variable: The return value of this function, as a Variable. 236 yield_variable: The yield value of this function, as a Variable. 237 """ 238 239 def __init__(self, node, vm, f_code, f_globals, f_locals, f_back, callargs, 240 closure, func, first_arg: Optional[cfg.Variable], 241 substs: Collection[Dict[str, cfg.Variable]]): 242 """Initialize a special frame as needed by TypegraphVirtualMachine. 243 244 Args: 245 node: The current CFG graph node. 246 vm: The owning virtual machine. 247 f_code: The code object to execute in this frame. 248 f_globals: The global context to execute in as a SimpleValue as 249 used by TypegraphVirtualMachine. 250 f_locals: Local variables. Will be modified if callargs is passed. 251 f_back: The frame above this one on the stack. 252 callargs: Additional function arguments to store in f_locals. 253 closure: A tuple containing the new co_freevars. 254 func: An Optional[cfg.Binding] to the function this frame corresponds to. 255 first_arg: First argument to the function. 256 substs: Maps from type parameter names in scope for this frame to their 257 possible values. 258 Raises: 259 NameError: If we can't resolve any references into the outer frame. 260 """ 261 super().__init__(vm) 262 assert isinstance(f_globals, abstract.LazyConcreteDict) 263 assert isinstance(f_locals, abstract.LazyConcreteDict) 264 self.node = node 265 self.current_opcode = None 266 self.f_code = f_code 267 self.states = {} 268 self.f_globals = f_globals 269 self.f_locals = f_locals 270 self.f_back = f_back 271 if f_back and f_back.f_builtins: 272 self.f_builtins = f_back.f_builtins 273 else: 274 _, bltin = self.vm.attribute_handler.get_attribute( 275 self.vm.root_node, f_globals, "__builtins__") 276 builtins_pu, = bltin.bindings 277 self.f_builtins = builtins_pu.data 278 self.f_lineno = f_code.co_firstlineno 279 # The first argument is used to make Python 3 super calls when super is not 280 # passed any arguments. 281 self.first_arg = first_arg 282 self.cells = {} 283 284 self.allowed_returns = None 285 self.check_return = False 286 self.return_variable = self.vm.program.NewVariable() 287 self.yield_variable = self.vm.program.NewVariable() 288 289 # Keep track of the current opcode block and and block targets we add while 290 # executing it; they can potentially be removed if the block returns early. 291 self.current_block = None 292 self.targets = collections.defaultdict(list) 293 294 # A map from function name to @typing.overload-decorated signatures. The 295 # overloads are copied to the implementation in InterpreterFunction.make. 296 self.overloads = collections.defaultdict(list) 297 298 # A closure g communicates with its outer function f through two 299 # fields in CodeType (both of which are tuples of strings): 300 # f.co_cellvars: All f-local variables that are used in g (or any other 301 # closure). 302 # g.co_freevars: All variables from f that g uses. 303 # Also, note that f.co_cellvars will only also be in f.co_varnames 304 # if they are also parameters of f (because co_varnames[0:co_argcount] are 305 # always the parameters), but won't otherwise. 306 # Cells 0 .. num(cellvars)-1 : cellvar; num(cellvars) .. end : freevar 307 assert len(f_code.co_freevars) == len(closure or []) 308 self.cells = [self.vm.program.NewVariable() 309 for _ in f_code.co_cellvars] 310 self.cells.extend(closure or []) 311 312 if callargs: 313 for name, value in sorted(callargs.items()): 314 if name in f_code.co_cellvars: 315 i = f_code.co_cellvars.index(name) 316 self.cells[i].PasteVariable(value, node) 317 else: 318 self.vm.attribute_handler.set_attribute(node, f_locals, name, value) 319 # Python 3 supports calling 'super' without any arguments. In such a case 320 # the implicit type argument is inserted into __build_class__'s cellvars 321 # and propagated as a closure variable to all method/functions calling 322 # 'super' without any arguments. 323 # If this is a frame for the function called by __build_class__ (see 324 # abstract.BuildClass), then we will store a reference to the variable 325 # corresponding to the cellvar named "__class__" separately for convenient 326 # access. After the class is built, abstract.BuildClass.call will add the 327 # binding for the new class into this variable. 328 self.class_closure_var = None 329 if func and isinstance(func.data, abstract.InterpreterFunction): 330 closure_name = abstract.BuildClass.CLOSURE_NAME 331 if func.data.is_class_builder and closure_name in f_code.co_cellvars: 332 i = f_code.co_cellvars.index(closure_name) 333 self.class_closure_var = self.cells[i] 334 self.func = func 335 self.substs = substs 336 337 def __repr__(self): # pragma: no cover 338 return "<Frame at 0x%08x: %r @ %d>" % ( 339 id(self), self.f_code.co_filename, self.f_lineno 340 ) 341 342 @property 343 def type_params(self): 344 return set(itertools.chain.from_iterable(self.substs)) 345 346 347class Condition: 348 """Represents a condition due to if-splitting. 349 350 Properties: 351 node: A CFGNode. 352 binding: A Binding for the condition's constraints. 353 """ 354 355 def __init__(self, node, dnf): 356 # The condition is represented by a dummy variable with a single binding 357 # to None. The origins for this binding are the dnf clauses. 358 self._var = node.program.NewVariable() 359 self._binding = self._var.AddBinding(None) 360 for clause in dnf: 361 sources = set(clause) 362 self._binding.AddOrigin(node, sources) 363 364 @property 365 def binding(self): 366 return self._binding 367 368 369_restrict_counter = metrics.MapCounter("state_restrict") 370 371 372def split_conditions(node, var): 373 """Return a pair of conditions for the value being true and false.""" 374 return (_restrict_condition(node, var.bindings, True), 375 _restrict_condition(node, var.bindings, False)) 376 377 378def _restrict_condition(node, bindings, logical_value): 379 """Return a restricted condition based on filtered bindings. 380 381 Args: 382 node: The CFGNode. 383 bindings: A sequence of bindings. 384 logical_value: Either True or False. 385 386 Returns: 387 A Condition or None. Each binding is checked for compatibility with 388 logical_value. If either no bindings match, or all bindings match, then 389 None is returned. Otherwise a new Condition is built from the specified, 390 compatible, bindings. 391 """ 392 dnf = [] 393 restricted = False 394 for b in bindings: 395 match = compare.compatible_with(b.data, logical_value) 396 if match is True: # pylint: disable=g-bool-id-comparison 397 dnf.append([b]) 398 elif match is False: # pylint: disable=g-bool-id-comparison 399 restricted = True 400 else: 401 dnf.extend(match) 402 # In theory, the value could have returned [[b]] as its DNF, in which 403 # case this isn't really a restriction. However in practice this is 404 # very unlikely to occur, and treating it as a restriction will not 405 # cause any problems. 406 restricted = True 407 if not dnf: 408 _restrict_counter.inc("unsatisfiable") 409 return UNSATISFIABLE 410 elif restricted: 411 _restrict_counter.inc("restricted") 412 return Condition(node, dnf) 413 else: 414 _restrict_counter.inc("unrestricted") 415 return None 416 417 418def _is_or_is_not_cmp(left, right, is_not=False): 419 """Implementation of 'left is right' amd 'left is not right'.""" 420 if (isinstance(left, mixin.PythonConstant) and 421 isinstance(right, mixin.PythonConstant)): 422 if left.cls != right.cls: 423 return is_not 424 return is_not ^ (left.pyval == right.pyval) 425 elif (isinstance(left, abstract.Instance) and 426 isinstance(right, abstract.Instance)): 427 if left.cls != right.cls: 428 # If those were the same they could be the same but we can't be sure from 429 # comparing types. 430 return is_not 431 return None 432 elif (isinstance(left, class_mixin.Class) and 433 isinstance(right, class_mixin.Class)): 434 # types are singletons. We use the name so that, e.g., two different 435 # TupleClass instances compare as identical. 436 return is_not ^ (left.full_name == right.full_name) 437 else: 438 return None 439 440 441def is_cmp(left, right): 442 return _is_or_is_not_cmp(left, right, is_not=False) 443 444 445def is_not_cmp(left, right): 446 return _is_or_is_not_cmp(left, right, is_not=True) 447