1"""
2Decompiler that can be used with the debugger (where statements correctly represent the
3line numbers).
4
5Note: this is a work in progress / proof of concept / not ready to be used.
6"""
7
8import dis
9
10from _pydevd_bundle.pydevd_collect_bytecode_info import _iter_instructions
11from _pydevd_bundle.pydevd_constants import dict_iter_items, IS_PY2
12from _pydev_bundle import pydev_log
13import sys
14import inspect
15
16try:
17    xrange = xrange
18except:
19    xrange = range
20
21
22class _Stack(object):
23
24    def __init__(self):
25        self._contents = []
26
27    def push(self, obj):
28        #         print('push', obj)
29        self._contents.append(obj)
30
31    def pop(self):
32        return self._contents.pop(-1)
33
34
35INDENT_MARKER = object()
36DEDENT_MARKER = object()
37_SENTINEL = object()
38
39DEBUG = False
40
41
42class _Token(object):
43
44    def __init__(self, i_line, instruction=None, tok=_SENTINEL, priority=0, after=None, end_of_line=False):
45        '''
46        :param i_line:
47        :param instruction:
48        :param tok:
49        :param priority:
50        :param after:
51        :param end_of_line:
52            Marker to signal only after all the other tokens have been written.
53        '''
54        self.i_line = i_line
55        if tok is not _SENTINEL:
56            self.tok = tok
57        else:
58            if instruction is not None:
59                if inspect.iscode(instruction.argval):
60                    self.tok = ''
61                else:
62                    self.tok = str(instruction.argval)
63            else:
64                raise AssertionError('Either the tok or the instruction is needed.')
65        self.instruction = instruction
66        self.priority = priority
67        self.end_of_line = end_of_line
68        self._after_tokens = set()
69        self._after_handler_tokens = set()
70        if after:
71            self.mark_after(after)
72
73    def mark_after(self, v):
74        if isinstance(v, _Token):
75            self._after_tokens.add(v)
76        elif isinstance(v, _BaseHandler):
77            self._after_handler_tokens.add(v)
78
79        else:
80            raise AssertionError('Unhandled: %s' % (v,))
81
82    def get_after_tokens(self):
83        ret = self._after_tokens.copy()
84        for handler in self._after_handler_tokens:
85            ret.update(handler.tokens)
86        return ret
87
88    def __repr__(self):
89        return 'Token(%s, after: %s)' % (self.tok, self.get_after_tokens())
90
91    __str__ = __repr__
92
93
94class _Writer(object):
95
96    def __init__(self):
97        self.line_to_contents = {}
98        self.all_tokens = set()
99
100    def get_line(self, line):
101        lst = self.line_to_contents.get(line)
102        if lst is None:
103            lst = self.line_to_contents[line] = []
104        return lst
105
106    def indent(self, line):
107        self.get_line(line).append(INDENT_MARKER)
108
109    def dedent(self, line):
110        self.get_line(line).append(DEDENT_MARKER)
111
112    def write(self, line, token):
113        if token in self.all_tokens:
114            return
115        self.all_tokens.add(token)
116        assert isinstance(token, _Token)
117        lst = self.get_line(line)
118        lst.append(token)
119
120
121class _BaseHandler(object):
122
123    def __init__(self, i_line, instruction, stack, writer, disassembler):
124        self.i_line = i_line
125        self.instruction = instruction
126        self.stack = stack
127        self.writer = writer
128        self.disassembler = disassembler
129        self.tokens = []
130        self._handle()
131
132    def _write_tokens(self):
133        for token in self.tokens:
134            self.writer.write(token.i_line, token)
135
136    def _handle(self):
137        raise NotImplementedError(self)
138
139    def __repr__(self, *args, **kwargs):
140        try:
141            return "%s line:%s" % (self.instruction, self.i_line)
142        except:
143            return object.__repr__(self)
144
145    __str__ = __repr__
146
147
148_op_name_to_handler = {}
149
150
151def _register(cls):
152    _op_name_to_handler[cls.opname] = cls
153    return cls
154
155
156class _BasePushHandler(_BaseHandler):
157
158    def _handle(self):
159        self.stack.push(self)
160
161
162class _BaseLoadHandler(_BasePushHandler):
163
164    def _handle(self):
165        _BasePushHandler._handle(self)
166        self.tokens = [_Token(self.i_line, self.instruction)]
167
168
169@_register
170class _LoadBuildClass(_BasePushHandler):
171    opname = "LOAD_BUILD_CLASS"
172
173
174@_register
175class _LoadConst(_BaseLoadHandler):
176    opname = "LOAD_CONST"
177
178
179@_register
180class _LoadName(_BaseLoadHandler):
181    opname = "LOAD_NAME"
182
183
184@_register
185class _LoadGlobal(_BaseLoadHandler):
186    opname = "LOAD_GLOBAL"
187
188
189@_register
190class _LoadFast(_BaseLoadHandler):
191    opname = "LOAD_FAST"
192
193
194@_register
195class _GetIter(_BaseHandler):
196    '''
197    Implements TOS = iter(TOS).
198    '''
199    opname = "GET_ITER"
200    iter_target = None
201
202    def _handle(self):
203        self.iter_target = self.stack.pop()
204        self.tokens.extend(self.iter_target.tokens)
205        self.stack.push(self)
206
207
208@_register
209class _ForIter(_BaseHandler):
210    '''
211    TOS is an iterator. Call its __next__() method. If this yields a new value, push it on the stack
212    (leaving the iterator below it). If the iterator indicates it is exhausted TOS is popped, and
213    the byte code counter is incremented by delta.
214    '''
215    opname = "FOR_ITER"
216
217    iter_in = None
218
219    def _handle(self):
220        self.iter_in = self.stack.pop()
221        self.stack.push(self)
222
223    def store_in_name(self, store_name):
224        for_token = _Token(self.i_line, None, 'for ')
225        self.tokens.append(for_token)
226        prev = for_token
227
228        t_name = _Token(store_name.i_line, store_name.instruction, after=prev)
229        self.tokens.append(t_name)
230        prev = t_name
231
232        in_token = _Token(store_name.i_line, None, ' in ', after=prev)
233        self.tokens.append(in_token)
234        prev = in_token
235
236        max_line = store_name.i_line
237        if self.iter_in:
238            for t in self.iter_in.tokens:
239                t.mark_after(prev)
240                max_line = max(max_line, t.i_line)
241                prev = t
242            self.tokens.extend(self.iter_in.tokens)
243
244        colon_token = _Token(self.i_line, None, ':', after=prev)
245        self.tokens.append(colon_token)
246        prev = for_token
247
248        self._write_tokens()
249
250
251@_register
252class _StoreName(_BaseHandler):
253    '''
254    Implements name = TOS. namei is the index of name in the attribute co_names of the code object.
255    The compiler tries to use STORE_FAST or STORE_GLOBAL if possible.
256    '''
257
258    opname = "STORE_NAME"
259
260    def _handle(self):
261        v = self.stack.pop()
262
263        if isinstance(v, _ForIter):
264            v.store_in_name(self)
265        else:
266            if not isinstance(v, _MakeFunction) or v.is_lambda:
267                line = self.i_line
268                for t in v.tokens:
269                    line = min(line, t.i_line)
270
271                t_name = _Token(line, self.instruction)
272                t_equal = _Token(line, None, '=', after=t_name)
273
274                self.tokens.append(t_name)
275                self.tokens.append(t_equal)
276
277                for t in v.tokens:
278                    t.mark_after(t_equal)
279                self.tokens.extend(v.tokens)
280
281                self._write_tokens()
282
283
284@_register
285class _ReturnValue(_BaseHandler):
286    """
287    Returns with TOS to the caller of the function.
288    """
289
290    opname = "RETURN_VALUE"
291
292    def _handle(self):
293        v = self.stack.pop()
294        return_token = _Token(self.i_line, None, 'return ', end_of_line=True)
295        self.tokens.append(return_token)
296        for token in v.tokens:
297            token.mark_after(return_token)
298        self.tokens.extend(v.tokens)
299
300        self._write_tokens()
301
302
303@_register
304class _CallFunction(_BaseHandler):
305    """
306
307    CALL_FUNCTION(argc)
308
309        Calls a callable object with positional arguments. argc indicates the number of positional
310        arguments. The top of the stack contains positional arguments, with the right-most argument
311        on top. Below the arguments is a callable object to call. CALL_FUNCTION pops all arguments
312        and the callable object off the stack, calls the callable object with those arguments, and
313        pushes the return value returned by the callable object.
314
315        Changed in version 3.6: This opcode is used only for calls with positional arguments.
316
317    """
318
319    opname = "CALL_FUNCTION"
320
321    def _handle(self):
322        args = []
323        for _i in xrange(self.instruction.argval + 1):
324            arg = self.stack.pop()
325            args.append(arg)
326        it = reversed(args)
327        name = next(it)
328        max_line = name.i_line
329        for t in name.tokens:
330            self.tokens.append(t)
331
332        tok_open_parens = _Token(name.i_line, None, '(', after=name)
333        self.tokens.append(tok_open_parens)
334
335        prev = tok_open_parens
336        for i, arg in enumerate(it):
337            for t in arg.tokens:
338                t.mark_after(name)
339                t.mark_after(prev)
340                max_line = max(max_line, t.i_line)
341                self.tokens.append(t)
342            prev = arg
343
344            if i > 0:
345                comma_token = _Token(prev.i_line, None, ',', after=prev)
346                self.tokens.append(comma_token)
347                prev = comma_token
348
349        tok_close_parens = _Token(max_line, None, ')', after=prev)
350        self.tokens.append(tok_close_parens)
351
352        self._write_tokens()
353
354        self.stack.push(self)
355
356
357if IS_PY2:
358
359    @_register
360    class _MakeFunctionPy2(_BaseHandler):
361        """
362        Pushes a new function object on the stack. TOS is the code associated with the function. The
363        function object is defined to have argc default parameters, which are found below TOS.
364        """
365
366        opname = "MAKE_FUNCTION"
367
368        def _handle(self):
369            stack = self.stack
370            self.code = stack.pop()
371
372            stack.push(self)
373
374    _MakeFunction = _MakeFunctionPy2
375
376else:
377
378    @_register
379    class _MakeFunctionPy3(_BaseHandler):
380        """
381        Pushes a new function object on the stack. From bottom to top, the consumed stack must consist
382        of values if the argument carries a specified flag value
383
384            0x01 a tuple of default values for positional-only and positional-or-keyword parameters in positional order
385
386            0x02 a dictionary of keyword-only parameters' default values
387
388            0x04 an annotation dictionary
389
390            0x08 a tuple containing cells for free variables, making a closure
391
392            the code associated with the function (at TOS1)
393
394            the qualified name of the function (at TOS)
395        """
396
397        opname = "MAKE_FUNCTION"
398        is_lambda = False
399
400        def _handle(self):
401            stack = self.stack
402            self.qualified_name = stack.pop()
403            self.code = stack.pop()
404
405            default_node = None
406            if self.instruction.argval & 0x01:
407                default_node = stack.pop()
408
409            is_lambda = self.is_lambda = '<lambda>' in [x.tok for x in self.qualified_name.tokens]
410
411            if not is_lambda:
412                def_token = _Token(self.i_line, None, 'def ')
413                self.tokens.append(def_token)
414
415            for token in self.qualified_name.tokens:
416                self.tokens.append(token)
417                if not is_lambda:
418                    token.mark_after(def_token)
419            prev = token
420
421            open_parens_token = _Token(self.i_line, None, '(', after=prev)
422            self.tokens.append(open_parens_token)
423            prev = open_parens_token
424
425            code = self.code.instruction.argval
426
427            if default_node:
428                defaults = ([_SENTINEL] * (len(code.co_varnames) - len(default_node.instruction.argval))) + list(default_node.instruction.argval)
429            else:
430                defaults = [_SENTINEL] * len(code.co_varnames)
431
432            for i, arg in enumerate(code.co_varnames):
433                if i > 0:
434                    comma_token = _Token(prev.i_line, None, ', ', after=prev)
435                    self.tokens.append(comma_token)
436                    prev = comma_token
437
438                arg_token = _Token(self.i_line, None, arg, after=prev)
439                self.tokens.append(arg_token)
440
441                default = defaults[i]
442                if default is not _SENTINEL:
443                    eq_token = _Token(default_node.i_line, None, '=', after=prev)
444                    self.tokens.append(eq_token)
445                    prev = eq_token
446
447                    default_token = _Token(default_node.i_line, None, str(default), after=prev)
448                    self.tokens.append(default_token)
449                    prev = default_token
450
451            tok_close_parens = _Token(prev.i_line, None, '):', after=prev)
452            self.tokens.append(tok_close_parens)
453
454            self._write_tokens()
455
456            stack.push(self)
457            self.writer.indent(prev.i_line + 1)
458            self.writer.dedent(max(self.disassembler.merge_code(code)))
459
460    _MakeFunction = _MakeFunctionPy3
461
462
463def _print_after_info(line_contents, stream=None):
464    if stream is None:
465        stream = sys.stdout
466    for token in line_contents:
467        after_tokens = token.get_after_tokens()
468        if after_tokens:
469            s = '%s after: %s\n' % (
470                repr(token.tok),
471                ('"' + '", "'.join(t.tok for t in token.get_after_tokens()) + '"'))
472            stream.write(s)
473        else:
474            stream.write('%s      (NO REQUISITES)' % repr(token.tok))
475
476
477def _compose_line_contents(line_contents, previous_line_tokens):
478    lst = []
479    handled = set()
480
481    add_to_end_of_line = []
482    delete_indexes = []
483    for i, token in enumerate(line_contents):
484        if token.end_of_line:
485            add_to_end_of_line.append(token)
486            delete_indexes.append(i)
487    for i in reversed(delete_indexes):
488        del line_contents[i]
489    del delete_indexes
490
491    while line_contents:
492        added = False
493        delete_indexes = []
494
495        for i, token in enumerate(line_contents):
496            after_tokens = token.get_after_tokens()
497            for after in after_tokens:
498                if after not in handled and after not in previous_line_tokens:
499                    break
500            else:
501                added = True
502                previous_line_tokens.add(token)
503                handled.add(token)
504                lst.append(token.tok)
505                delete_indexes.append(i)
506
507        for i in reversed(delete_indexes):
508            del line_contents[i]
509
510        if not added:
511            if add_to_end_of_line:
512                line_contents.extend(add_to_end_of_line)
513                del add_to_end_of_line[:]
514                continue
515
516            # Something is off, let's just add as is.
517            for token in line_contents:
518                if token not in handled:
519                    lst.append(token.tok)
520
521            try:
522                from StringIO import StringIO
523            except:
524                from io import StringIO
525            stream = StringIO()
526            _print_after_info(line_contents, stream)
527            pydev_log.critical('Error. After markers are not correct:\n%s', stream.getvalue())
528            break
529    return ''.join(lst)
530
531
532class _PyCodeToSource(object):
533
534    def __init__(self, co, memo=None):
535        if memo is None:
536            memo = {}
537        self.memo = memo
538        self.co = co
539        self.instructions = list(_iter_instructions(co))
540        self.stack = _Stack()
541        self.writer = _Writer()
542
543    def _process_next(self, i_line):
544        instruction = self.instructions.pop(0)
545        handler_class = _op_name_to_handler.get(instruction.opname)
546        if handler_class is not None:
547            s = handler_class(i_line, instruction, self.stack, self.writer, self)
548            if DEBUG:
549                print(s)
550
551        else:
552            if DEBUG:
553                print("UNHANDLED", instruction)
554
555    def build_line_to_contents(self):
556        co = self.co
557
558        op_offset_to_line = dict(dis.findlinestarts(co))
559        curr_line_index = 0
560
561        instructions = self.instructions
562        while instructions:
563            instruction = instructions[0]
564            new_line_index = op_offset_to_line.get(instruction.offset)
565            if new_line_index is not None:
566                if new_line_index is not None:
567                    curr_line_index = new_line_index
568
569            self._process_next(curr_line_index)
570        return self.writer.line_to_contents
571
572    def merge_code(self, code):
573        if DEBUG:
574            print('merge code ----')
575        # for d in dir(code):
576        #     if not d.startswith('_'):
577        #         print(d, getattr(code, d))
578        line_to_contents = _PyCodeToSource(code, self.memo).build_line_to_contents()
579        lines = []
580        for line, contents in sorted(dict_iter_items(line_to_contents)):
581            lines.append(line)
582            self.writer.get_line(line).extend(contents)
583        if DEBUG:
584            print('end merge code ----')
585        return lines
586
587    def disassemble(self):
588        show_lines = False
589        line_to_contents = self.build_line_to_contents()
590        from io import StringIO
591
592        stream = StringIO()
593        last_line = 0
594        indent = ''
595        previous_line_tokens = set()
596        for i_line, contents in sorted(dict_iter_items(line_to_contents)):
597            while last_line < i_line - 1:
598                if show_lines:
599                    stream.write(u"%s.\n" % (last_line + 1,))
600                else:
601                    stream.write(u"\n")
602                last_line += 1
603
604            line_contents = []
605            dedents_found = 0
606            for part in contents:
607                if part is INDENT_MARKER:
608                    if DEBUG:
609                        print('found indent', i_line)
610                    indent += '    '
611                    continue
612                if part is DEDENT_MARKER:
613                    if DEBUG:
614                        print('found dedent', i_line)
615                    dedents_found += 1
616                    continue
617                line_contents.append(part)
618
619            s = indent + _compose_line_contents(line_contents, previous_line_tokens)
620            if show_lines:
621                stream.write(u"%s. %s\n" % (i_line, s))
622            else:
623                stream.write(u"%s\n" % s)
624
625            if dedents_found:
626                indent = indent[:-(4 * dedents_found)]
627            last_line = i_line
628
629        return stream.getvalue()
630
631
632def code_obj_to_source(co):
633    """
634    Converts a code object to source code to provide a suitable representation for the compiler when
635    the actual source code is not found.
636
637    This is a work in progress / proof of concept / not ready to be used.
638    """
639    ret = _PyCodeToSource(co).disassemble()
640    if DEBUG:
641        print(ret)
642    return ret
643