1""" 2Decompiler that can be used with the debugger (where statements correctly represent the 3line numbers). 4 5Note: this is a work in progress / proof of concept / not ready to be used. 6""" 7 8import dis 9 10from _pydevd_bundle.pydevd_collect_bytecode_info import _iter_instructions 11from _pydevd_bundle.pydevd_constants import dict_iter_items, IS_PY2 12from _pydev_bundle import pydev_log 13import sys 14import inspect 15 16try: 17 xrange = xrange 18except: 19 xrange = range 20 21 22class _Stack(object): 23 24 def __init__(self): 25 self._contents = [] 26 27 def push(self, obj): 28 # print('push', obj) 29 self._contents.append(obj) 30 31 def pop(self): 32 return self._contents.pop(-1) 33 34 35INDENT_MARKER = object() 36DEDENT_MARKER = object() 37_SENTINEL = object() 38 39DEBUG = False 40 41 42class _Token(object): 43 44 def __init__(self, i_line, instruction=None, tok=_SENTINEL, priority=0, after=None, end_of_line=False): 45 ''' 46 :param i_line: 47 :param instruction: 48 :param tok: 49 :param priority: 50 :param after: 51 :param end_of_line: 52 Marker to signal only after all the other tokens have been written. 53 ''' 54 self.i_line = i_line 55 if tok is not _SENTINEL: 56 self.tok = tok 57 else: 58 if instruction is not None: 59 if inspect.iscode(instruction.argval): 60 self.tok = '' 61 else: 62 self.tok = str(instruction.argval) 63 else: 64 raise AssertionError('Either the tok or the instruction is needed.') 65 self.instruction = instruction 66 self.priority = priority 67 self.end_of_line = end_of_line 68 self._after_tokens = set() 69 self._after_handler_tokens = set() 70 if after: 71 self.mark_after(after) 72 73 def mark_after(self, v): 74 if isinstance(v, _Token): 75 self._after_tokens.add(v) 76 elif isinstance(v, _BaseHandler): 77 self._after_handler_tokens.add(v) 78 79 else: 80 raise AssertionError('Unhandled: %s' % (v,)) 81 82 def get_after_tokens(self): 83 ret = self._after_tokens.copy() 84 for handler in self._after_handler_tokens: 85 ret.update(handler.tokens) 86 return ret 87 88 def __repr__(self): 89 return 'Token(%s, after: %s)' % (self.tok, self.get_after_tokens()) 90 91 __str__ = __repr__ 92 93 94class _Writer(object): 95 96 def __init__(self): 97 self.line_to_contents = {} 98 self.all_tokens = set() 99 100 def get_line(self, line): 101 lst = self.line_to_contents.get(line) 102 if lst is None: 103 lst = self.line_to_contents[line] = [] 104 return lst 105 106 def indent(self, line): 107 self.get_line(line).append(INDENT_MARKER) 108 109 def dedent(self, line): 110 self.get_line(line).append(DEDENT_MARKER) 111 112 def write(self, line, token): 113 if token in self.all_tokens: 114 return 115 self.all_tokens.add(token) 116 assert isinstance(token, _Token) 117 lst = self.get_line(line) 118 lst.append(token) 119 120 121class _BaseHandler(object): 122 123 def __init__(self, i_line, instruction, stack, writer, disassembler): 124 self.i_line = i_line 125 self.instruction = instruction 126 self.stack = stack 127 self.writer = writer 128 self.disassembler = disassembler 129 self.tokens = [] 130 self._handle() 131 132 def _write_tokens(self): 133 for token in self.tokens: 134 self.writer.write(token.i_line, token) 135 136 def _handle(self): 137 raise NotImplementedError(self) 138 139 def __repr__(self, *args, **kwargs): 140 try: 141 return "%s line:%s" % (self.instruction, self.i_line) 142 except: 143 return object.__repr__(self) 144 145 __str__ = __repr__ 146 147 148_op_name_to_handler = {} 149 150 151def _register(cls): 152 _op_name_to_handler[cls.opname] = cls 153 return cls 154 155 156class _BasePushHandler(_BaseHandler): 157 158 def _handle(self): 159 self.stack.push(self) 160 161 162class _BaseLoadHandler(_BasePushHandler): 163 164 def _handle(self): 165 _BasePushHandler._handle(self) 166 self.tokens = [_Token(self.i_line, self.instruction)] 167 168 169@_register 170class _LoadBuildClass(_BasePushHandler): 171 opname = "LOAD_BUILD_CLASS" 172 173 174@_register 175class _LoadConst(_BaseLoadHandler): 176 opname = "LOAD_CONST" 177 178 179@_register 180class _LoadName(_BaseLoadHandler): 181 opname = "LOAD_NAME" 182 183 184@_register 185class _LoadGlobal(_BaseLoadHandler): 186 opname = "LOAD_GLOBAL" 187 188 189@_register 190class _LoadFast(_BaseLoadHandler): 191 opname = "LOAD_FAST" 192 193 194@_register 195class _GetIter(_BaseHandler): 196 ''' 197 Implements TOS = iter(TOS). 198 ''' 199 opname = "GET_ITER" 200 iter_target = None 201 202 def _handle(self): 203 self.iter_target = self.stack.pop() 204 self.tokens.extend(self.iter_target.tokens) 205 self.stack.push(self) 206 207 208@_register 209class _ForIter(_BaseHandler): 210 ''' 211 TOS is an iterator. Call its __next__() method. If this yields a new value, push it on the stack 212 (leaving the iterator below it). If the iterator indicates it is exhausted TOS is popped, and 213 the byte code counter is incremented by delta. 214 ''' 215 opname = "FOR_ITER" 216 217 iter_in = None 218 219 def _handle(self): 220 self.iter_in = self.stack.pop() 221 self.stack.push(self) 222 223 def store_in_name(self, store_name): 224 for_token = _Token(self.i_line, None, 'for ') 225 self.tokens.append(for_token) 226 prev = for_token 227 228 t_name = _Token(store_name.i_line, store_name.instruction, after=prev) 229 self.tokens.append(t_name) 230 prev = t_name 231 232 in_token = _Token(store_name.i_line, None, ' in ', after=prev) 233 self.tokens.append(in_token) 234 prev = in_token 235 236 max_line = store_name.i_line 237 if self.iter_in: 238 for t in self.iter_in.tokens: 239 t.mark_after(prev) 240 max_line = max(max_line, t.i_line) 241 prev = t 242 self.tokens.extend(self.iter_in.tokens) 243 244 colon_token = _Token(self.i_line, None, ':', after=prev) 245 self.tokens.append(colon_token) 246 prev = for_token 247 248 self._write_tokens() 249 250 251@_register 252class _StoreName(_BaseHandler): 253 ''' 254 Implements name = TOS. namei is the index of name in the attribute co_names of the code object. 255 The compiler tries to use STORE_FAST or STORE_GLOBAL if possible. 256 ''' 257 258 opname = "STORE_NAME" 259 260 def _handle(self): 261 v = self.stack.pop() 262 263 if isinstance(v, _ForIter): 264 v.store_in_name(self) 265 else: 266 if not isinstance(v, _MakeFunction) or v.is_lambda: 267 line = self.i_line 268 for t in v.tokens: 269 line = min(line, t.i_line) 270 271 t_name = _Token(line, self.instruction) 272 t_equal = _Token(line, None, '=', after=t_name) 273 274 self.tokens.append(t_name) 275 self.tokens.append(t_equal) 276 277 for t in v.tokens: 278 t.mark_after(t_equal) 279 self.tokens.extend(v.tokens) 280 281 self._write_tokens() 282 283 284@_register 285class _ReturnValue(_BaseHandler): 286 """ 287 Returns with TOS to the caller of the function. 288 """ 289 290 opname = "RETURN_VALUE" 291 292 def _handle(self): 293 v = self.stack.pop() 294 return_token = _Token(self.i_line, None, 'return ', end_of_line=True) 295 self.tokens.append(return_token) 296 for token in v.tokens: 297 token.mark_after(return_token) 298 self.tokens.extend(v.tokens) 299 300 self._write_tokens() 301 302 303@_register 304class _CallFunction(_BaseHandler): 305 """ 306 307 CALL_FUNCTION(argc) 308 309 Calls a callable object with positional arguments. argc indicates the number of positional 310 arguments. The top of the stack contains positional arguments, with the right-most argument 311 on top. Below the arguments is a callable object to call. CALL_FUNCTION pops all arguments 312 and the callable object off the stack, calls the callable object with those arguments, and 313 pushes the return value returned by the callable object. 314 315 Changed in version 3.6: This opcode is used only for calls with positional arguments. 316 317 """ 318 319 opname = "CALL_FUNCTION" 320 321 def _handle(self): 322 args = [] 323 for _i in xrange(self.instruction.argval + 1): 324 arg = self.stack.pop() 325 args.append(arg) 326 it = reversed(args) 327 name = next(it) 328 max_line = name.i_line 329 for t in name.tokens: 330 self.tokens.append(t) 331 332 tok_open_parens = _Token(name.i_line, None, '(', after=name) 333 self.tokens.append(tok_open_parens) 334 335 prev = tok_open_parens 336 for i, arg in enumerate(it): 337 for t in arg.tokens: 338 t.mark_after(name) 339 t.mark_after(prev) 340 max_line = max(max_line, t.i_line) 341 self.tokens.append(t) 342 prev = arg 343 344 if i > 0: 345 comma_token = _Token(prev.i_line, None, ',', after=prev) 346 self.tokens.append(comma_token) 347 prev = comma_token 348 349 tok_close_parens = _Token(max_line, None, ')', after=prev) 350 self.tokens.append(tok_close_parens) 351 352 self._write_tokens() 353 354 self.stack.push(self) 355 356 357if IS_PY2: 358 359 @_register 360 class _MakeFunctionPy2(_BaseHandler): 361 """ 362 Pushes a new function object on the stack. TOS is the code associated with the function. The 363 function object is defined to have argc default parameters, which are found below TOS. 364 """ 365 366 opname = "MAKE_FUNCTION" 367 368 def _handle(self): 369 stack = self.stack 370 self.code = stack.pop() 371 372 stack.push(self) 373 374 _MakeFunction = _MakeFunctionPy2 375 376else: 377 378 @_register 379 class _MakeFunctionPy3(_BaseHandler): 380 """ 381 Pushes a new function object on the stack. From bottom to top, the consumed stack must consist 382 of values if the argument carries a specified flag value 383 384 0x01 a tuple of default values for positional-only and positional-or-keyword parameters in positional order 385 386 0x02 a dictionary of keyword-only parameters' default values 387 388 0x04 an annotation dictionary 389 390 0x08 a tuple containing cells for free variables, making a closure 391 392 the code associated with the function (at TOS1) 393 394 the qualified name of the function (at TOS) 395 """ 396 397 opname = "MAKE_FUNCTION" 398 is_lambda = False 399 400 def _handle(self): 401 stack = self.stack 402 self.qualified_name = stack.pop() 403 self.code = stack.pop() 404 405 default_node = None 406 if self.instruction.argval & 0x01: 407 default_node = stack.pop() 408 409 is_lambda = self.is_lambda = '<lambda>' in [x.tok for x in self.qualified_name.tokens] 410 411 if not is_lambda: 412 def_token = _Token(self.i_line, None, 'def ') 413 self.tokens.append(def_token) 414 415 for token in self.qualified_name.tokens: 416 self.tokens.append(token) 417 if not is_lambda: 418 token.mark_after(def_token) 419 prev = token 420 421 open_parens_token = _Token(self.i_line, None, '(', after=prev) 422 self.tokens.append(open_parens_token) 423 prev = open_parens_token 424 425 code = self.code.instruction.argval 426 427 if default_node: 428 defaults = ([_SENTINEL] * (len(code.co_varnames) - len(default_node.instruction.argval))) + list(default_node.instruction.argval) 429 else: 430 defaults = [_SENTINEL] * len(code.co_varnames) 431 432 for i, arg in enumerate(code.co_varnames): 433 if i > 0: 434 comma_token = _Token(prev.i_line, None, ', ', after=prev) 435 self.tokens.append(comma_token) 436 prev = comma_token 437 438 arg_token = _Token(self.i_line, None, arg, after=prev) 439 self.tokens.append(arg_token) 440 441 default = defaults[i] 442 if default is not _SENTINEL: 443 eq_token = _Token(default_node.i_line, None, '=', after=prev) 444 self.tokens.append(eq_token) 445 prev = eq_token 446 447 default_token = _Token(default_node.i_line, None, str(default), after=prev) 448 self.tokens.append(default_token) 449 prev = default_token 450 451 tok_close_parens = _Token(prev.i_line, None, '):', after=prev) 452 self.tokens.append(tok_close_parens) 453 454 self._write_tokens() 455 456 stack.push(self) 457 self.writer.indent(prev.i_line + 1) 458 self.writer.dedent(max(self.disassembler.merge_code(code))) 459 460 _MakeFunction = _MakeFunctionPy3 461 462 463def _print_after_info(line_contents, stream=None): 464 if stream is None: 465 stream = sys.stdout 466 for token in line_contents: 467 after_tokens = token.get_after_tokens() 468 if after_tokens: 469 s = '%s after: %s\n' % ( 470 repr(token.tok), 471 ('"' + '", "'.join(t.tok for t in token.get_after_tokens()) + '"')) 472 stream.write(s) 473 else: 474 stream.write('%s (NO REQUISITES)' % repr(token.tok)) 475 476 477def _compose_line_contents(line_contents, previous_line_tokens): 478 lst = [] 479 handled = set() 480 481 add_to_end_of_line = [] 482 delete_indexes = [] 483 for i, token in enumerate(line_contents): 484 if token.end_of_line: 485 add_to_end_of_line.append(token) 486 delete_indexes.append(i) 487 for i in reversed(delete_indexes): 488 del line_contents[i] 489 del delete_indexes 490 491 while line_contents: 492 added = False 493 delete_indexes = [] 494 495 for i, token in enumerate(line_contents): 496 after_tokens = token.get_after_tokens() 497 for after in after_tokens: 498 if after not in handled and after not in previous_line_tokens: 499 break 500 else: 501 added = True 502 previous_line_tokens.add(token) 503 handled.add(token) 504 lst.append(token.tok) 505 delete_indexes.append(i) 506 507 for i in reversed(delete_indexes): 508 del line_contents[i] 509 510 if not added: 511 if add_to_end_of_line: 512 line_contents.extend(add_to_end_of_line) 513 del add_to_end_of_line[:] 514 continue 515 516 # Something is off, let's just add as is. 517 for token in line_contents: 518 if token not in handled: 519 lst.append(token.tok) 520 521 try: 522 from StringIO import StringIO 523 except: 524 from io import StringIO 525 stream = StringIO() 526 _print_after_info(line_contents, stream) 527 pydev_log.critical('Error. After markers are not correct:\n%s', stream.getvalue()) 528 break 529 return ''.join(lst) 530 531 532class _PyCodeToSource(object): 533 534 def __init__(self, co, memo=None): 535 if memo is None: 536 memo = {} 537 self.memo = memo 538 self.co = co 539 self.instructions = list(_iter_instructions(co)) 540 self.stack = _Stack() 541 self.writer = _Writer() 542 543 def _process_next(self, i_line): 544 instruction = self.instructions.pop(0) 545 handler_class = _op_name_to_handler.get(instruction.opname) 546 if handler_class is not None: 547 s = handler_class(i_line, instruction, self.stack, self.writer, self) 548 if DEBUG: 549 print(s) 550 551 else: 552 if DEBUG: 553 print("UNHANDLED", instruction) 554 555 def build_line_to_contents(self): 556 co = self.co 557 558 op_offset_to_line = dict(dis.findlinestarts(co)) 559 curr_line_index = 0 560 561 instructions = self.instructions 562 while instructions: 563 instruction = instructions[0] 564 new_line_index = op_offset_to_line.get(instruction.offset) 565 if new_line_index is not None: 566 if new_line_index is not None: 567 curr_line_index = new_line_index 568 569 self._process_next(curr_line_index) 570 return self.writer.line_to_contents 571 572 def merge_code(self, code): 573 if DEBUG: 574 print('merge code ----') 575 # for d in dir(code): 576 # if not d.startswith('_'): 577 # print(d, getattr(code, d)) 578 line_to_contents = _PyCodeToSource(code, self.memo).build_line_to_contents() 579 lines = [] 580 for line, contents in sorted(dict_iter_items(line_to_contents)): 581 lines.append(line) 582 self.writer.get_line(line).extend(contents) 583 if DEBUG: 584 print('end merge code ----') 585 return lines 586 587 def disassemble(self): 588 show_lines = False 589 line_to_contents = self.build_line_to_contents() 590 from io import StringIO 591 592 stream = StringIO() 593 last_line = 0 594 indent = '' 595 previous_line_tokens = set() 596 for i_line, contents in sorted(dict_iter_items(line_to_contents)): 597 while last_line < i_line - 1: 598 if show_lines: 599 stream.write(u"%s.\n" % (last_line + 1,)) 600 else: 601 stream.write(u"\n") 602 last_line += 1 603 604 line_contents = [] 605 dedents_found = 0 606 for part in contents: 607 if part is INDENT_MARKER: 608 if DEBUG: 609 print('found indent', i_line) 610 indent += ' ' 611 continue 612 if part is DEDENT_MARKER: 613 if DEBUG: 614 print('found dedent', i_line) 615 dedents_found += 1 616 continue 617 line_contents.append(part) 618 619 s = indent + _compose_line_contents(line_contents, previous_line_tokens) 620 if show_lines: 621 stream.write(u"%s. %s\n" % (i_line, s)) 622 else: 623 stream.write(u"%s\n" % s) 624 625 if dedents_found: 626 indent = indent[:-(4 * dedents_found)] 627 last_line = i_line 628 629 return stream.getvalue() 630 631 632def code_obj_to_source(co): 633 """ 634 Converts a code object to source code to provide a suitable representation for the compiler when 635 the actual source code is not found. 636 637 This is a work in progress / proof of concept / not ready to be used. 638 """ 639 ret = _PyCodeToSource(co).disassemble() 640 if DEBUG: 641 print(ret) 642 return ret 643