1#!/usr/local/bin/python3.8
2#
3# PLASMA : Generate an indented asm code (pseudo-C) with colored syntax.
4# Copyright (C) 2015    Joel
5#
6# This program is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 3 of the License, or
9# (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program.    If not, see <http://www.gnu.org/licenses/>.
18#
19
20import struct
21from time import time
22
23from plasma.lib.graph import Graph
24from plasma.lib.utils import (unsigned, debug__, BYTES_PRINTABLE_SET,
25                              get_char, print_no_end, warning)
26from plasma.lib.fileformat.binary import Binary, T_BIN_PE, T_BIN_ELF, T_BIN_RAW
27from plasma.lib.colors import (color_addr, color_symbol, color_comment,
28                               color_section, color_string)
29from plasma.lib.exceptions import ExcArch, ExcFileFormat
30from plasma.lib.memory import Memory
31from plasma.lib.consts import *
32
33
34class Disassembler():
35    def __init__(self, filename, raw_type, raw_base, raw_big_endian, database):
36        import capstone as CAPSTONE
37
38        arch_lookup = {
39            "x86": CAPSTONE.CS_ARCH_X86,
40            "x64": CAPSTONE.CS_ARCH_X86,
41            "ARM": CAPSTONE.CS_ARCH_ARM,
42            "MIPS32": CAPSTONE.CS_ARCH_MIPS,
43            "MIPS64": CAPSTONE.CS_ARCH_MIPS,
44        }
45
46        mode_lookup = {
47            "x86": CAPSTONE.CS_MODE_32,
48            "x64": CAPSTONE.CS_MODE_64,
49            "ARM": CAPSTONE.CS_ARCH_ARM,
50            "MIPS32": CAPSTONE.CS_MODE_MIPS32,
51            "MIPS64": CAPSTONE.CS_MODE_MIPS64,
52        }
53
54        word_size_lookup = {
55            "x86": 4,
56            "x64": 8,
57            "ARM": 4,
58            "MIPS32": 4,
59            "MIPS64": 8,
60        }
61
62        self.capstone_inst = {} # capstone instruction cache
63        self.db = database
64
65        if database.loaded:
66            self.mem = database.mem
67        else:
68            self.mem = Memory()
69            database.mem = self.mem
70
71        self.instanciate_binary(filename, raw_type, raw_base, raw_big_endian)
72
73        if self.binary.arch not in ("x86", "x64", "MIPS32", "MIPS64", "ARM"):
74            raise ExcArch(self.binary.arch)
75
76        self.wordsize = word_size_lookup.get(self.binary.arch, None)
77        self.binary.wordsize = self.wordsize
78
79        self.is_mips = self.binary.arch in ("MIPS32", "MIPS64")
80        self.is_x86 = self.binary.arch in ("x86", "x64")
81        self.is_arm = self.binary.arch in ("ARM")
82        self.is_big_endian = self.binary.is_big_endian()
83
84        self.binary.load_section_names()
85
86        self.jmptables = database.jmptables
87        self.user_inline_comments = database.user_inline_comments
88        self.internal_inline_comments = database.internal_inline_comments
89        self.user_previous_comments = database.user_previous_comments
90        self.internal_previous_comments = database.internal_previous_comments
91        self.functions = database.functions
92        self.func_id = database.func_id
93        self.end_functions = database.end_functions
94
95        self.xrefs = database.xrefs
96        self.mem.xrefs = database.xrefs
97        self.mem.data_sub_xrefs = database.data_sub_xrefs
98
99        self.mips_gp = database.mips_gp
100
101        if not database.loaded:
102            self.load_symbols()
103            database.symbols = self.binary.symbols
104            database.reverse_symbols = self.binary.reverse_symbols
105            database.demangled = self.binary.demangled
106            database.reverse_demangled = self.binary.reverse_demangled
107            database.imports = self.binary.imports
108        else:
109            self.binary.symbols = database.symbols
110            self.binary.reverse_symbols = database.reverse_symbols
111            self.binary.demangled = database.demangled
112            self.binary.reverse_demangled = database.reverse_demangled
113            self.binary.imports = database.imports
114
115        cs_arch = arch_lookup.get(self.binary.arch, None)
116        cs_mode = mode_lookup.get(self.binary.arch, None)
117
118        if self.is_big_endian:
119            cs_mode |= CAPSTONE.CS_MODE_BIG_ENDIAN
120        else:
121            cs_mode |= CAPSTONE.CS_MODE_LITTLE_ENDIAN
122
123        self.capstone = CAPSTONE
124        self.md = CAPSTONE.Cs(cs_arch, cs_mode)
125        self.md.detail = True
126
127        for s in self.binary.iter_sections():
128            s.big_endian = cs_mode & CAPSTONE.CS_MODE_BIG_ENDIAN
129
130
131    def instanciate_binary(self, filename, raw_type, raw_base, raw_big_endian):
132        if raw_type != None:
133            import plasma.lib.fileformat.raw as LIB_RAW
134            self.binary = LIB_RAW.Raw(filename, raw_type, raw_base, raw_big_endian)
135            self.type = T_BIN_RAW
136            return
137
138        start = time()
139        ty = self.get_magic(filename)
140
141        if ty == T_BIN_ELF:
142            import plasma.lib.fileformat.elf as LIB_ELF
143            self.binary = LIB_ELF.ELF(self.db, filename)
144        elif ty == T_BIN_PE:
145            import plasma.lib.fileformat.pe as LIB_PE
146            self.binary = LIB_PE.PE(self.db, filename)
147        else:
148            raise ExcFileFormat()
149
150        self.binary.type = ty
151
152        elapsed = time()
153        elapsed = elapsed - start
154        debug__("Binary loaded in %fs" % elapsed)
155
156
157    def load_symbols(self):
158        start = time()
159        self.binary.load_static_sym()
160        self.binary.load_dyn_sym()
161        self.binary.demangle_symbols()
162
163        ep = self.binary.get_entry_point()
164        if ep not in self.binary.reverse_symbols:
165            name = "_start"
166            n = name
167            i = 0
168            while n in self.binary.symbols:
169                n = "%s_%d" % (name, i)
170                i += 1
171            name = n
172
173            self.binary.symbols[name] = ep
174            self.binary.reverse_symbols[ep] = name
175
176        elapsed = time()
177        elapsed = elapsed - start
178        debug__("Found %d symbols in %fs" % (len(self.binary.symbols), elapsed))
179
180
181    def get_magic(self, filename):
182        f = open(filename, "rb")
183        magic = f.read(8)
184        f.close()
185        if magic.startswith(b"\x7fELF"):
186            return T_BIN_ELF
187        elif magic.startswith(b"MZ"):
188            return T_BIN_PE
189        return None
190
191
192    # `func_ad` is the function address where the variable `name`
193    # is supposed to be.
194    def var_get_offset(self, func_ad, name):
195        if func_ad not in self.functions:
196            return None
197        func_obj = self.functions[func_ad]
198        if func_obj is None:
199            return None
200        for off, val in func_obj[FUNC_VARS].items():
201            if val[VAR_NAME] == name:
202                return off
203        return None
204
205
206    def load_arch_module(self):
207        if self.binary.arch in ("x86", "x64"):
208            import plasma.lib.arch.x86 as ARCH
209        elif self.binary.arch == "ARM":
210            import plasma.lib.arch.arm as ARCH
211        elif self.binary.arch in ("MIPS32", "MIPS64"):
212            import plasma.lib.arch.mips as ARCH
213        else:
214            raise NotImplementedError
215        return ARCH
216
217
218    def dump_xrefs(self, ctx, ad):
219        ARCH = self.load_arch_module()
220        ARCH_OUTPUT = ARCH.output
221
222        o = ARCH_OUTPUT.Output(ctx)
223        o._new_line()
224        o.print_labels = False
225        xrefs = list(ctx.gctx.api.xrefsto(ad))
226        xrefs.sort()
227
228        seen = set()
229
230        for x in xrefs:
231            x = self.mem.get_head_addr(x)
232
233            if x in seen:
234                continue
235
236            seen.add(x)
237            s = self.binary.get_section(x)
238
239            ty = self.mem.get_type(x)
240
241            # A PE import should not be displayed as a subroutine
242            if not(self.binary.type == T_BIN_PE and x in self.binary.imports) \
243                   and (ty == MEM_FUNC or ty == MEM_CODE):
244
245                func_id = self.mem.get_func_id(x)
246                if func_id != -1:
247                    fad = self.func_id[func_id]
248                    o._label(fad)
249                    diff = x - fad
250                    if diff >= 0:
251                        o._add(" + %d " % diff)
252                    else:
253                        o._add(" - %d " % (-diff))
254
255                o._pad_width(20)
256
257                i = self.lazy_disasm(x, s.start)
258                o._asm_inst(i)
259
260            elif MEM_WOFFSET <= ty <= MEM_QOFFSET:
261                o.set_line(x)
262                o._pad_width(20)
263                o._address(x)
264                sz = self.mem.get_size(x)
265                off = s.read_int(x, sz)
266                if off is None:
267                    continue
268                o._data_prefix(sz)
269                o._add(" ")
270                o._imm(off, sz, True, print_data=False, force_dont_print_data=True)
271                o._new_line()
272
273            elif ty == MEM_ARRAY:
274                o.set_line(x)
275                o._pad_width(20)
276                o._address(x)
277                o._label(x, print_colon=True)
278                o._new_line()
279
280            else:
281                o._pad_width(20)
282                o._address(x)
283                o.set_line(x)
284                sz = self.mem.get_size_from_type(ty)
285                o._word(s.read_int(x, sz), sz)
286                o._new_line()
287
288        # remove the last empty line
289        o.lines.pop(-1)
290        o.token_lines.pop(-1)
291
292        o.join_lines()
293
294        return o
295
296
297    def is_label(self, ad):
298        return ad in self.db.reverse_symbols or ad in self.xrefs
299
300
301    def dump_asm(self, ctx, lines=NB_LINES_TO_DISASM, until=-1):
302        ARCH = self.load_arch_module()
303        ARCH_OUTPUT = ARCH.output
304        ARCH_UTILS = ARCH.utils
305
306        ad = ctx.entry
307        s = self.binary.get_section(ad)
308
309        if s is None:
310            # until is != -1 only from the visual mode
311            # It allows to not go before the first section.
312            if until != -1:
313                return None
314            # Get the next section, it's not mandatory that sections
315            # are consecutives !
316            s = self.binary.get_next_section(ad)
317            if s is None:
318                return None
319            ad = s.start
320
321        o = ARCH_OUTPUT.Output(ctx)
322        o._new_line()
323        o.curr_section = s
324        o.mode_dump = True
325        l = 0
326        api = ctx.gctx.api
327
328        # For mips: after a jump we add a newline, but for mips we should
329        # add this newline after the prefetch instruction.
330        prefetch_after_branch = False
331
332        while 1:
333            if ad == s.start:
334                if not o.last_2_lines_are_empty():
335                    o._new_line()
336                o._dash()
337                o._section(s.name)
338                o._add("  0x%x -> 0x%x" % (s.start, s.end))
339                o._new_line()
340                o._new_line()
341
342            while ((l < lines and until == -1) or (ad < until and until != -1)) \
343                    and ad <= s.end:
344
345                ty = self.mem.get_type(ad)
346
347                # A PE import should not be displayed as a subroutine
348                if not(self.binary.type == T_BIN_PE and ad in self.binary.imports) \
349                        and self.mem.is_code(ad):
350
351                    is_func = ad in self.functions
352
353                    if is_func:
354                        if not o.last_2_lines_are_empty():
355                            o._new_line()
356                        o._dash()
357                        o._user_comment("; SUBROUTINE")
358                        o._new_line()
359                        o._dash()
360
361                    i = self.lazy_disasm(ad, s.start)
362
363                    if not is_func and ad in self.xrefs and \
364                            not o.last_2_lines_are_empty():
365                        o._new_line()
366
367                    o._asm_inst(i)
368
369                    is_end = ad in self.end_functions
370
371                    # mips
372                    if prefetch_after_branch:
373                        prefetch_after_branch = False
374                        if not is_end:
375                            o._new_line()
376
377                    if is_end:
378                        for fad in self.end_functions[ad]:
379                            sy = api.get_symbol(fad)
380                            o._user_comment("; end function %s" % sy)
381                            o._new_line()
382                        o._new_line()
383
384                    elif ARCH_UTILS.is_uncond_jump(i) or ARCH_UTILS.is_ret(i):
385                        if self.is_mips:
386                            prefetch_after_branch = True
387                        else:
388                            o._new_line()
389
390                    elif ARCH_UTILS.is_call(i):
391                        op = i.operands[0]
392                        if op.type == self.capstone.CS_OP_IMM:
393                            imm = unsigned(op.value.imm)
394                            if imm in self.functions and self.is_noreturn(imm):
395                                if self.is_mips:
396                                    prefetch_after_branch = True
397                                else:
398                                    o._new_line()
399
400                    ad += i.size
401
402                elif MEM_WOFFSET <= ty <= MEM_QOFFSET:
403                    prefetch_after_branch = False
404                    o._label_and_address(ad)
405                    o.set_line(ad)
406                    sz = self.mem.get_size(ad)
407                    off = s.read_int(ad, sz)
408                    if off is None:
409                        continue
410                    if ctx.gctx.print_bytes:
411                        o._bytes(s.read(ad, sz))
412                    o._data_prefix(sz)
413                    o._add(" ")
414                    o._imm(off, sz, True, print_data=False, force_dont_print_data=True)
415                    o._new_line()
416                    ad += sz
417
418                elif ty == MEM_ASCII:
419                    prefetch_after_branch = False
420                    o._label_and_address(ad)
421                    o.set_line(ad)
422                    sz = self.mem.get_size(ad)
423                    buf = self.binary.get_string(ad, sz)
424
425                    if buf is not None:
426                        if ctx.gctx.print_bytes:
427                            o._bytes(s.read(ad, sz))
428
429                        # Split the string into multi lines
430
431                        splitted = buf.split("\n")
432
433                        j = 0
434                        for i, st in enumerate(splitted):
435                            if i > 0 and len(st) != 0:
436                                o._new_line()
437                                o.set_line(ad + j)
438                                o._address(ad + j)
439
440                            ibs = 0
441                            bs = 65
442                            while ibs < len(st):
443                                if ibs > 0:
444                                    o._new_line()
445                                    o.set_line(ad + j)
446                                    o._address(ad + j)
447
448                                blk = st[ibs:ibs + bs]
449
450                                if i < len(splitted) - 1 and ibs + bs >= len(st):
451                                    o._string('"' + blk + '\\n"')
452                                    j += len(blk) + 1
453                                else:
454                                    o._string('"' + blk + '"')
455                                    j += len(blk)
456
457                                ibs += bs
458
459                    o._add(", 0")
460                    o._new_line()
461                    ad += sz
462
463                elif ty == MEM_ARRAY:
464                    prefetch_after_branch = False
465                    o._label_and_address(ad)
466
467                    array_info = self.mem.mm[ad]
468                    total_size = array_info[0]
469                    entry_type = array_info[2]
470                    entry_size = self.mem.get_size_from_type(entry_type)
471
472                    n = int(total_size / entry_size)
473
474                    o.set_line(ad)
475                    o._data_prefix(entry_size)
476
477                    k = 0
478                    while k < total_size:
479                        if o.curr_index > 70:
480                            o._new_line()
481                            o.set_line(ad)
482                            o._address(ad)
483                            o._data_prefix(entry_size)
484                            l += 1
485
486                        val = s.read_int(ad, entry_size)
487                        if MEM_WOFFSET <= entry_type <= MEM_QOFFSET:
488                            o._add(" ")
489                            o._imm(val, entry_size, True,
490                                   print_data=False, force_dont_print_data=True)
491                        else:
492                            o._word(val, entry_size, is_from_array=True)
493
494                        ad += entry_size
495                        k += entry_size
496
497                        if k < total_size:
498                            o._add(",")
499
500                    o._new_line()
501
502                else:
503                    prefetch_after_branch = False
504                    o._label_and_address(ad)
505                    o.set_line(ad)
506                    sz = self.mem.get_size_from_type(ty)
507                    if ctx.gctx.print_bytes:
508                        o._bytes(s.read(ad, sz))
509                    o._word(s.read_int(ad, sz), sz)
510                    o._new_line()
511                    ad += sz
512
513                l += 1
514
515            s = self.binary.get_section(ad)
516            if s is None:
517                # Get the next section, it's not mandatory that sections
518                # are consecutives !
519                s = self.binary.get_next_section(ad)
520                if s is None:
521                    break
522                o._new_line()
523                ad = s.start
524                if until != -1 and ad >= until:
525                    break
526
527            if (l >= lines and until == -1) or (ad >= until and until != -1):
528                break
529
530            o.curr_section = s
531
532        if until == ad:
533            if self.mem.is_code(ad) and ad in self.xrefs or \
534                    s is not None and ad == s.start:
535                if not o.last_2_lines_are_empty():
536                    o._new_line()
537
538        # remove the last empty line
539        o.lines.pop(-1)
540        o.token_lines.pop(-1)
541
542        o.join_lines()
543
544        return o
545
546
547    def hexdump(self, ctx, lines):
548        MAX_NB_BYTES = 16
549
550        def print_line(ad, line):
551            if not line:
552                return
553
554            print_no_end(color_addr(ad))
555
556            for by in line:
557                print_no_end("%.2x " % by)
558
559            if len(line) != MAX_NB_BYTES:
560                print_no_end("   " * (MAX_NB_BYTES - len(line)))
561
562            print_no_end("| ")
563
564            for by in line:
565                if by in BYTES_PRINTABLE_SET and by != 13 and by != 9 and by != 10:
566                    print_no_end("%c" % by)
567                else:
568                    print_no_end(".")
569
570            print()
571
572        ad = ctx.entry
573        s = self.binary.get_section(ad)
574        off = ad - s.start
575        l = 0
576        buf = []
577        first_ad = ad
578
579        while off < s.real_size and l < lines:
580            buf.append(s.data[off])
581            if len(buf) == MAX_NB_BYTES:
582                l += 1
583                print_line(first_ad, buf)
584                buf.clear()
585                first_ad = s.start + off
586
587            off += 1
588
589        print_line(first_ad, buf)
590
591
592    def print_functions(self, api):
593        total = 0
594
595        lst = list(self.functions)
596        lst.sort()
597
598        # TODO: race condition with the analyzer ?
599        for ad in lst:
600            print_no_end(color_addr(ad))
601            sy = api.get_symbol(ad)
602
603            if ad in self.db.reverse_demangled:
604                print_no_end(" %s (%s) " % (self.db.reverse_demangled[ad],
605                                           color_comment(sy)))
606            else:
607                print_no_end(" " + sy)
608            print()
609
610            total += 1
611
612        print("Total:", total)
613
614    #
615    # sym_filter : search a symbol, non case-sensitive
616    #    if it starts with '-', it prints non-matching symbols
617    #
618    def print_symbols(self, sym_filter=None):
619        if sym_filter is not None:
620            sym_filter = sym_filter.lower()
621            if sym_filter[0] == "-":
622                invert_match = True
623                sym_filter = sym_filter[1:]
624            else:
625                invert_match = False
626
627        total = 0
628
629        # TODO: race condition with the analyzer ?
630        for sy in list(self.db.symbols):
631            ad = self.db.symbols[sy]
632
633            if ad in self.db.reverse_demangled:
634                dem = self.db.reverse_demangled[ad]
635            else:
636                dem = None
637
638            print_sym = True
639
640            if sym_filter is None or \
641                    (invert_match and sym_filter not in sy.lower()) or \
642                    (not invert_match and sym_filter in sy.lower()) or \
643                    (dem is not None and
644                     ((invert_match and sym_filter not in dem.lower()) or \
645                      (not invert_match and sym_filter in dem.lower()))):
646
647                if sy:
648                    print_no_end(color_addr(ad))
649
650                    if dem is not None:
651                        print_no_end(" %s (%s) " % (dem, color_comment(sy)))
652                    else:
653                        print_no_end(" " + sy)
654
655                    print()
656                    total += 1
657
658        print("Total:", total)
659
660
661    def lazy_disasm(self, ad, stay_in_section=-1, s=None):
662        s = self.binary.get_section(ad)
663        if s is None:
664            return None
665
666        # if stay_in_section != -1 and s.start != stay_in_section:
667            # return None, s
668
669        if ad in self.capstone_inst:
670            return self.capstone_inst[ad]
671
672        # TODO: remove when it's too big ?
673        if len(self.capstone_inst) > CAPSTONE_CACHE_SIZE:
674            self.capstone_inst.clear()
675
676        # Disassemble by block of N bytes
677        N = 128
678        d = s.read(ad, N)
679        gen = self.md.disasm(d, ad)
680
681        try:
682            first = next(gen)
683        except StopIteration:
684            return None
685
686        self.capstone_inst[first.address] = first
687        for i in gen:
688            if i.address in self.capstone_inst:
689                break
690            self.capstone_inst[i.address] = i
691
692        return first
693
694
695    def __add_prefetch(self, addr_set, inst):
696        if self.is_mips:
697            prefetch = self.lazy_disasm(inst.address + inst.size)
698            addr_set.add(prefetch.address)
699            return prefetch
700        return None
701
702
703    def is_noreturn(self, ad):
704        func_obj = self.functions[ad]
705        if func_obj is None:
706            return False
707        return self.functions[ad][FUNC_FLAGS] & FUNC_FLAG_NORETURN
708
709
710    # Generate a flow graph of the given function (addr)
711    def get_graph(self, entry):
712        ARCH_UTILS = self.load_arch_module().utils
713
714        gph = Graph(self, entry)
715        stack = [entry]
716        start = time()
717        prefetch = None
718        addresses = set()
719
720        # WARNING: this assume that on every architectures the jump
721        # address is the last operand (operands[-1])
722
723        # Here each instruction is a node. Blocks will be created in the
724        # function __simplify.
725
726        while stack:
727            ad = stack.pop()
728            inst = self.lazy_disasm(ad)
729
730            if inst is None:
731                # Remove all previous instructions which have a link
732                # to this instruction.
733                if ad in gph.link_in:
734                    for i in gph.link_in[ad]:
735                        gph.link_out[i].remove(ad)
736                    for i in gph.link_in[ad]:
737                        if not gph.link_out[i]:
738                            del gph.link_out[i]
739                    del gph.link_in[ad]
740                continue
741
742            if gph.exists(inst):
743                continue
744
745            addresses.add(ad)
746
747            if ARCH_UTILS.is_ret(inst):
748                prefetch = self.__add_prefetch(addresses, inst)
749                gph.new_node(inst, prefetch, None)
750
751            elif ARCH_UTILS.is_uncond_jump(inst):
752                prefetch = self.__add_prefetch(addresses, inst)
753
754                gph.uncond_jumps_set.add(ad)
755                op = inst.operands[-1]
756
757                if op.type == self.capstone.CS_OP_IMM:
758                    nxt = unsigned(op.value.imm)
759
760                    if nxt in self.functions or self.db.mem.is_data(nxt):
761                        gph.new_node(inst, prefetch, None)
762                    else:
763                        stack.append(nxt)
764                        gph.new_node(inst, prefetch, [nxt])
765
766                else:
767                    if inst.address in self.jmptables:
768                        table = self.jmptables[inst.address].table
769                        stack += table
770                        gph.new_node(inst, prefetch, table)
771                    else:
772                        # Can't interpret jmp ADDR|reg
773                        gph.new_node(inst, prefetch, None)
774
775            elif ARCH_UTILS.is_cond_jump(inst):
776                prefetch = self.__add_prefetch(addresses, inst)
777
778                gph.cond_jumps_set.add(ad)
779                op = inst.operands[-1]
780
781                if op.type == self.capstone.CS_OP_IMM:
782                    if prefetch is None:
783                        direct_nxt = inst.address + inst.size
784                    else:
785                        direct_nxt = prefetch.address + prefetch.size
786
787                    nxt_jmp = unsigned(op.value.imm)
788
789                    is_d1 =  direct_nxt in self.functions or self.db.mem.is_data(direct_nxt)
790                    is_d2 =  nxt_jmp in self.functions or self.db.mem.is_data(nxt_jmp)
791
792                    if is_d1 and is_d2:
793                        gph.new_node(inst, prefetch, None)
794                    elif not (is_d1 and is_d2):
795                        stack.append(direct_nxt)
796                        stack.append(nxt_jmp)
797                        gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp])
798                    elif is_d1:
799                        stack.append(nxt_jmp)
800                        gph.new_node(inst, prefetch, [nxt_jmp])
801                    elif is_d2:
802                        stack.append(direct_nxt)
803                        gph.new_node(inst, prefetch, [direct_nxt])
804                else:
805                    # Can't interpret jmp ADDR|reg
806                    gph.new_node(inst, prefetch, None)
807
808            else:
809                if ad != entry and ARCH_UTILS.is_call(inst):
810                    # TODO: like in the analyzer, simulate registers
811                    # -> during the analysis, save in the database
812                    # the immediate value.
813                    op = inst.operands[0]
814                    if op.type == self.capstone.CS_OP_IMM:
815                        imm = unsigned(op.value.imm)
816                        if imm in self.functions and self.is_noreturn(imm):
817                            prefetch = self.__add_prefetch(addresses, inst)
818                            gph.new_node(inst, prefetch, None)
819                            gph.exit_or_ret.add(ad)
820                            continue
821
822                    if op.type == self.capstone.CS_OP_MEM:
823                        if ad in self.db.immediates and \
824                            self.binary.imports[self.db.immediates[ad]] & FUNC_FLAG_NORETURN:
825                            prefetch = self.__add_prefetch(addresses, inst)
826                            gph.new_node(inst, prefetch, None)
827                            gph.exit_or_ret.add(ad)
828                            continue
829
830                nxt = inst.address + inst.size
831
832                if nxt in self.functions or self.db.mem.is_data(nxt):
833                    gph.new_node(inst, None, None)
834                else:
835                    stack.append(nxt)
836                    gph.new_node(inst, None, [nxt])
837
838        if len(gph.nodes) == 0:
839            return None, 0
840
841        if self.binary.type == T_BIN_PE:
842            nb_new_syms = self.binary.reverse_stripped_list(self, addresses)
843        else:
844            nb_new_syms = 0
845
846        elapsed = time()
847        elapsed = elapsed - start
848        debug__("Graph built in %fs (%d instructions)" % (elapsed, len(gph.nodes)))
849
850        return gph, nb_new_syms
851