1#!/usr/local/bin/python3.8 2# 3# PLASMA : Generate an indented asm code (pseudo-C) with colored syntax. 4# Copyright (C) 2015 Joel 5# 6# This program is free software: you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation, either version 3 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program. If not, see <http://www.gnu.org/licenses/>. 18# 19 20import struct 21from time import time 22 23from plasma.lib.graph import Graph 24from plasma.lib.utils import (unsigned, debug__, BYTES_PRINTABLE_SET, 25 get_char, print_no_end, warning) 26from plasma.lib.fileformat.binary import Binary, T_BIN_PE, T_BIN_ELF, T_BIN_RAW 27from plasma.lib.colors import (color_addr, color_symbol, color_comment, 28 color_section, color_string) 29from plasma.lib.exceptions import ExcArch, ExcFileFormat 30from plasma.lib.memory import Memory 31from plasma.lib.consts import * 32 33 34class Disassembler(): 35 def __init__(self, filename, raw_type, raw_base, raw_big_endian, database): 36 import capstone as CAPSTONE 37 38 arch_lookup = { 39 "x86": CAPSTONE.CS_ARCH_X86, 40 "x64": CAPSTONE.CS_ARCH_X86, 41 "ARM": CAPSTONE.CS_ARCH_ARM, 42 "MIPS32": CAPSTONE.CS_ARCH_MIPS, 43 "MIPS64": CAPSTONE.CS_ARCH_MIPS, 44 } 45 46 mode_lookup = { 47 "x86": CAPSTONE.CS_MODE_32, 48 "x64": CAPSTONE.CS_MODE_64, 49 "ARM": CAPSTONE.CS_ARCH_ARM, 50 "MIPS32": CAPSTONE.CS_MODE_MIPS32, 51 "MIPS64": CAPSTONE.CS_MODE_MIPS64, 52 } 53 54 word_size_lookup = { 55 "x86": 4, 56 "x64": 8, 57 "ARM": 4, 58 "MIPS32": 4, 59 "MIPS64": 8, 60 } 61 62 self.capstone_inst = {} # capstone instruction cache 63 self.db = database 64 65 if database.loaded: 66 self.mem = database.mem 67 else: 68 self.mem = Memory() 69 database.mem = self.mem 70 71 self.instanciate_binary(filename, raw_type, raw_base, raw_big_endian) 72 73 if self.binary.arch not in ("x86", "x64", "MIPS32", "MIPS64", "ARM"): 74 raise ExcArch(self.binary.arch) 75 76 self.wordsize = word_size_lookup.get(self.binary.arch, None) 77 self.binary.wordsize = self.wordsize 78 79 self.is_mips = self.binary.arch in ("MIPS32", "MIPS64") 80 self.is_x86 = self.binary.arch in ("x86", "x64") 81 self.is_arm = self.binary.arch in ("ARM") 82 self.is_big_endian = self.binary.is_big_endian() 83 84 self.binary.load_section_names() 85 86 self.jmptables = database.jmptables 87 self.user_inline_comments = database.user_inline_comments 88 self.internal_inline_comments = database.internal_inline_comments 89 self.user_previous_comments = database.user_previous_comments 90 self.internal_previous_comments = database.internal_previous_comments 91 self.functions = database.functions 92 self.func_id = database.func_id 93 self.end_functions = database.end_functions 94 95 self.xrefs = database.xrefs 96 self.mem.xrefs = database.xrefs 97 self.mem.data_sub_xrefs = database.data_sub_xrefs 98 99 self.mips_gp = database.mips_gp 100 101 if not database.loaded: 102 self.load_symbols() 103 database.symbols = self.binary.symbols 104 database.reverse_symbols = self.binary.reverse_symbols 105 database.demangled = self.binary.demangled 106 database.reverse_demangled = self.binary.reverse_demangled 107 database.imports = self.binary.imports 108 else: 109 self.binary.symbols = database.symbols 110 self.binary.reverse_symbols = database.reverse_symbols 111 self.binary.demangled = database.demangled 112 self.binary.reverse_demangled = database.reverse_demangled 113 self.binary.imports = database.imports 114 115 cs_arch = arch_lookup.get(self.binary.arch, None) 116 cs_mode = mode_lookup.get(self.binary.arch, None) 117 118 if self.is_big_endian: 119 cs_mode |= CAPSTONE.CS_MODE_BIG_ENDIAN 120 else: 121 cs_mode |= CAPSTONE.CS_MODE_LITTLE_ENDIAN 122 123 self.capstone = CAPSTONE 124 self.md = CAPSTONE.Cs(cs_arch, cs_mode) 125 self.md.detail = True 126 127 for s in self.binary.iter_sections(): 128 s.big_endian = cs_mode & CAPSTONE.CS_MODE_BIG_ENDIAN 129 130 131 def instanciate_binary(self, filename, raw_type, raw_base, raw_big_endian): 132 if raw_type != None: 133 import plasma.lib.fileformat.raw as LIB_RAW 134 self.binary = LIB_RAW.Raw(filename, raw_type, raw_base, raw_big_endian) 135 self.type = T_BIN_RAW 136 return 137 138 start = time() 139 ty = self.get_magic(filename) 140 141 if ty == T_BIN_ELF: 142 import plasma.lib.fileformat.elf as LIB_ELF 143 self.binary = LIB_ELF.ELF(self.db, filename) 144 elif ty == T_BIN_PE: 145 import plasma.lib.fileformat.pe as LIB_PE 146 self.binary = LIB_PE.PE(self.db, filename) 147 else: 148 raise ExcFileFormat() 149 150 self.binary.type = ty 151 152 elapsed = time() 153 elapsed = elapsed - start 154 debug__("Binary loaded in %fs" % elapsed) 155 156 157 def load_symbols(self): 158 start = time() 159 self.binary.load_static_sym() 160 self.binary.load_dyn_sym() 161 self.binary.demangle_symbols() 162 163 ep = self.binary.get_entry_point() 164 if ep not in self.binary.reverse_symbols: 165 name = "_start" 166 n = name 167 i = 0 168 while n in self.binary.symbols: 169 n = "%s_%d" % (name, i) 170 i += 1 171 name = n 172 173 self.binary.symbols[name] = ep 174 self.binary.reverse_symbols[ep] = name 175 176 elapsed = time() 177 elapsed = elapsed - start 178 debug__("Found %d symbols in %fs" % (len(self.binary.symbols), elapsed)) 179 180 181 def get_magic(self, filename): 182 f = open(filename, "rb") 183 magic = f.read(8) 184 f.close() 185 if magic.startswith(b"\x7fELF"): 186 return T_BIN_ELF 187 elif magic.startswith(b"MZ"): 188 return T_BIN_PE 189 return None 190 191 192 # `func_ad` is the function address where the variable `name` 193 # is supposed to be. 194 def var_get_offset(self, func_ad, name): 195 if func_ad not in self.functions: 196 return None 197 func_obj = self.functions[func_ad] 198 if func_obj is None: 199 return None 200 for off, val in func_obj[FUNC_VARS].items(): 201 if val[VAR_NAME] == name: 202 return off 203 return None 204 205 206 def load_arch_module(self): 207 if self.binary.arch in ("x86", "x64"): 208 import plasma.lib.arch.x86 as ARCH 209 elif self.binary.arch == "ARM": 210 import plasma.lib.arch.arm as ARCH 211 elif self.binary.arch in ("MIPS32", "MIPS64"): 212 import plasma.lib.arch.mips as ARCH 213 else: 214 raise NotImplementedError 215 return ARCH 216 217 218 def dump_xrefs(self, ctx, ad): 219 ARCH = self.load_arch_module() 220 ARCH_OUTPUT = ARCH.output 221 222 o = ARCH_OUTPUT.Output(ctx) 223 o._new_line() 224 o.print_labels = False 225 xrefs = list(ctx.gctx.api.xrefsto(ad)) 226 xrefs.sort() 227 228 seen = set() 229 230 for x in xrefs: 231 x = self.mem.get_head_addr(x) 232 233 if x in seen: 234 continue 235 236 seen.add(x) 237 s = self.binary.get_section(x) 238 239 ty = self.mem.get_type(x) 240 241 # A PE import should not be displayed as a subroutine 242 if not(self.binary.type == T_BIN_PE and x in self.binary.imports) \ 243 and (ty == MEM_FUNC or ty == MEM_CODE): 244 245 func_id = self.mem.get_func_id(x) 246 if func_id != -1: 247 fad = self.func_id[func_id] 248 o._label(fad) 249 diff = x - fad 250 if diff >= 0: 251 o._add(" + %d " % diff) 252 else: 253 o._add(" - %d " % (-diff)) 254 255 o._pad_width(20) 256 257 i = self.lazy_disasm(x, s.start) 258 o._asm_inst(i) 259 260 elif MEM_WOFFSET <= ty <= MEM_QOFFSET: 261 o.set_line(x) 262 o._pad_width(20) 263 o._address(x) 264 sz = self.mem.get_size(x) 265 off = s.read_int(x, sz) 266 if off is None: 267 continue 268 o._data_prefix(sz) 269 o._add(" ") 270 o._imm(off, sz, True, print_data=False, force_dont_print_data=True) 271 o._new_line() 272 273 elif ty == MEM_ARRAY: 274 o.set_line(x) 275 o._pad_width(20) 276 o._address(x) 277 o._label(x, print_colon=True) 278 o._new_line() 279 280 else: 281 o._pad_width(20) 282 o._address(x) 283 o.set_line(x) 284 sz = self.mem.get_size_from_type(ty) 285 o._word(s.read_int(x, sz), sz) 286 o._new_line() 287 288 # remove the last empty line 289 o.lines.pop(-1) 290 o.token_lines.pop(-1) 291 292 o.join_lines() 293 294 return o 295 296 297 def is_label(self, ad): 298 return ad in self.db.reverse_symbols or ad in self.xrefs 299 300 301 def dump_asm(self, ctx, lines=NB_LINES_TO_DISASM, until=-1): 302 ARCH = self.load_arch_module() 303 ARCH_OUTPUT = ARCH.output 304 ARCH_UTILS = ARCH.utils 305 306 ad = ctx.entry 307 s = self.binary.get_section(ad) 308 309 if s is None: 310 # until is != -1 only from the visual mode 311 # It allows to not go before the first section. 312 if until != -1: 313 return None 314 # Get the next section, it's not mandatory that sections 315 # are consecutives ! 316 s = self.binary.get_next_section(ad) 317 if s is None: 318 return None 319 ad = s.start 320 321 o = ARCH_OUTPUT.Output(ctx) 322 o._new_line() 323 o.curr_section = s 324 o.mode_dump = True 325 l = 0 326 api = ctx.gctx.api 327 328 # For mips: after a jump we add a newline, but for mips we should 329 # add this newline after the prefetch instruction. 330 prefetch_after_branch = False 331 332 while 1: 333 if ad == s.start: 334 if not o.last_2_lines_are_empty(): 335 o._new_line() 336 o._dash() 337 o._section(s.name) 338 o._add(" 0x%x -> 0x%x" % (s.start, s.end)) 339 o._new_line() 340 o._new_line() 341 342 while ((l < lines and until == -1) or (ad < until and until != -1)) \ 343 and ad <= s.end: 344 345 ty = self.mem.get_type(ad) 346 347 # A PE import should not be displayed as a subroutine 348 if not(self.binary.type == T_BIN_PE and ad in self.binary.imports) \ 349 and self.mem.is_code(ad): 350 351 is_func = ad in self.functions 352 353 if is_func: 354 if not o.last_2_lines_are_empty(): 355 o._new_line() 356 o._dash() 357 o._user_comment("; SUBROUTINE") 358 o._new_line() 359 o._dash() 360 361 i = self.lazy_disasm(ad, s.start) 362 363 if not is_func and ad in self.xrefs and \ 364 not o.last_2_lines_are_empty(): 365 o._new_line() 366 367 o._asm_inst(i) 368 369 is_end = ad in self.end_functions 370 371 # mips 372 if prefetch_after_branch: 373 prefetch_after_branch = False 374 if not is_end: 375 o._new_line() 376 377 if is_end: 378 for fad in self.end_functions[ad]: 379 sy = api.get_symbol(fad) 380 o._user_comment("; end function %s" % sy) 381 o._new_line() 382 o._new_line() 383 384 elif ARCH_UTILS.is_uncond_jump(i) or ARCH_UTILS.is_ret(i): 385 if self.is_mips: 386 prefetch_after_branch = True 387 else: 388 o._new_line() 389 390 elif ARCH_UTILS.is_call(i): 391 op = i.operands[0] 392 if op.type == self.capstone.CS_OP_IMM: 393 imm = unsigned(op.value.imm) 394 if imm in self.functions and self.is_noreturn(imm): 395 if self.is_mips: 396 prefetch_after_branch = True 397 else: 398 o._new_line() 399 400 ad += i.size 401 402 elif MEM_WOFFSET <= ty <= MEM_QOFFSET: 403 prefetch_after_branch = False 404 o._label_and_address(ad) 405 o.set_line(ad) 406 sz = self.mem.get_size(ad) 407 off = s.read_int(ad, sz) 408 if off is None: 409 continue 410 if ctx.gctx.print_bytes: 411 o._bytes(s.read(ad, sz)) 412 o._data_prefix(sz) 413 o._add(" ") 414 o._imm(off, sz, True, print_data=False, force_dont_print_data=True) 415 o._new_line() 416 ad += sz 417 418 elif ty == MEM_ASCII: 419 prefetch_after_branch = False 420 o._label_and_address(ad) 421 o.set_line(ad) 422 sz = self.mem.get_size(ad) 423 buf = self.binary.get_string(ad, sz) 424 425 if buf is not None: 426 if ctx.gctx.print_bytes: 427 o._bytes(s.read(ad, sz)) 428 429 # Split the string into multi lines 430 431 splitted = buf.split("\n") 432 433 j = 0 434 for i, st in enumerate(splitted): 435 if i > 0 and len(st) != 0: 436 o._new_line() 437 o.set_line(ad + j) 438 o._address(ad + j) 439 440 ibs = 0 441 bs = 65 442 while ibs < len(st): 443 if ibs > 0: 444 o._new_line() 445 o.set_line(ad + j) 446 o._address(ad + j) 447 448 blk = st[ibs:ibs + bs] 449 450 if i < len(splitted) - 1 and ibs + bs >= len(st): 451 o._string('"' + blk + '\\n"') 452 j += len(blk) + 1 453 else: 454 o._string('"' + blk + '"') 455 j += len(blk) 456 457 ibs += bs 458 459 o._add(", 0") 460 o._new_line() 461 ad += sz 462 463 elif ty == MEM_ARRAY: 464 prefetch_after_branch = False 465 o._label_and_address(ad) 466 467 array_info = self.mem.mm[ad] 468 total_size = array_info[0] 469 entry_type = array_info[2] 470 entry_size = self.mem.get_size_from_type(entry_type) 471 472 n = int(total_size / entry_size) 473 474 o.set_line(ad) 475 o._data_prefix(entry_size) 476 477 k = 0 478 while k < total_size: 479 if o.curr_index > 70: 480 o._new_line() 481 o.set_line(ad) 482 o._address(ad) 483 o._data_prefix(entry_size) 484 l += 1 485 486 val = s.read_int(ad, entry_size) 487 if MEM_WOFFSET <= entry_type <= MEM_QOFFSET: 488 o._add(" ") 489 o._imm(val, entry_size, True, 490 print_data=False, force_dont_print_data=True) 491 else: 492 o._word(val, entry_size, is_from_array=True) 493 494 ad += entry_size 495 k += entry_size 496 497 if k < total_size: 498 o._add(",") 499 500 o._new_line() 501 502 else: 503 prefetch_after_branch = False 504 o._label_and_address(ad) 505 o.set_line(ad) 506 sz = self.mem.get_size_from_type(ty) 507 if ctx.gctx.print_bytes: 508 o._bytes(s.read(ad, sz)) 509 o._word(s.read_int(ad, sz), sz) 510 o._new_line() 511 ad += sz 512 513 l += 1 514 515 s = self.binary.get_section(ad) 516 if s is None: 517 # Get the next section, it's not mandatory that sections 518 # are consecutives ! 519 s = self.binary.get_next_section(ad) 520 if s is None: 521 break 522 o._new_line() 523 ad = s.start 524 if until != -1 and ad >= until: 525 break 526 527 if (l >= lines and until == -1) or (ad >= until and until != -1): 528 break 529 530 o.curr_section = s 531 532 if until == ad: 533 if self.mem.is_code(ad) and ad in self.xrefs or \ 534 s is not None and ad == s.start: 535 if not o.last_2_lines_are_empty(): 536 o._new_line() 537 538 # remove the last empty line 539 o.lines.pop(-1) 540 o.token_lines.pop(-1) 541 542 o.join_lines() 543 544 return o 545 546 547 def hexdump(self, ctx, lines): 548 MAX_NB_BYTES = 16 549 550 def print_line(ad, line): 551 if not line: 552 return 553 554 print_no_end(color_addr(ad)) 555 556 for by in line: 557 print_no_end("%.2x " % by) 558 559 if len(line) != MAX_NB_BYTES: 560 print_no_end(" " * (MAX_NB_BYTES - len(line))) 561 562 print_no_end("| ") 563 564 for by in line: 565 if by in BYTES_PRINTABLE_SET and by != 13 and by != 9 and by != 10: 566 print_no_end("%c" % by) 567 else: 568 print_no_end(".") 569 570 print() 571 572 ad = ctx.entry 573 s = self.binary.get_section(ad) 574 off = ad - s.start 575 l = 0 576 buf = [] 577 first_ad = ad 578 579 while off < s.real_size and l < lines: 580 buf.append(s.data[off]) 581 if len(buf) == MAX_NB_BYTES: 582 l += 1 583 print_line(first_ad, buf) 584 buf.clear() 585 first_ad = s.start + off 586 587 off += 1 588 589 print_line(first_ad, buf) 590 591 592 def print_functions(self, api): 593 total = 0 594 595 lst = list(self.functions) 596 lst.sort() 597 598 # TODO: race condition with the analyzer ? 599 for ad in lst: 600 print_no_end(color_addr(ad)) 601 sy = api.get_symbol(ad) 602 603 if ad in self.db.reverse_demangled: 604 print_no_end(" %s (%s) " % (self.db.reverse_demangled[ad], 605 color_comment(sy))) 606 else: 607 print_no_end(" " + sy) 608 print() 609 610 total += 1 611 612 print("Total:", total) 613 614 # 615 # sym_filter : search a symbol, non case-sensitive 616 # if it starts with '-', it prints non-matching symbols 617 # 618 def print_symbols(self, sym_filter=None): 619 if sym_filter is not None: 620 sym_filter = sym_filter.lower() 621 if sym_filter[0] == "-": 622 invert_match = True 623 sym_filter = sym_filter[1:] 624 else: 625 invert_match = False 626 627 total = 0 628 629 # TODO: race condition with the analyzer ? 630 for sy in list(self.db.symbols): 631 ad = self.db.symbols[sy] 632 633 if ad in self.db.reverse_demangled: 634 dem = self.db.reverse_demangled[ad] 635 else: 636 dem = None 637 638 print_sym = True 639 640 if sym_filter is None or \ 641 (invert_match and sym_filter not in sy.lower()) or \ 642 (not invert_match and sym_filter in sy.lower()) or \ 643 (dem is not None and 644 ((invert_match and sym_filter not in dem.lower()) or \ 645 (not invert_match and sym_filter in dem.lower()))): 646 647 if sy: 648 print_no_end(color_addr(ad)) 649 650 if dem is not None: 651 print_no_end(" %s (%s) " % (dem, color_comment(sy))) 652 else: 653 print_no_end(" " + sy) 654 655 print() 656 total += 1 657 658 print("Total:", total) 659 660 661 def lazy_disasm(self, ad, stay_in_section=-1, s=None): 662 s = self.binary.get_section(ad) 663 if s is None: 664 return None 665 666 # if stay_in_section != -1 and s.start != stay_in_section: 667 # return None, s 668 669 if ad in self.capstone_inst: 670 return self.capstone_inst[ad] 671 672 # TODO: remove when it's too big ? 673 if len(self.capstone_inst) > CAPSTONE_CACHE_SIZE: 674 self.capstone_inst.clear() 675 676 # Disassemble by block of N bytes 677 N = 128 678 d = s.read(ad, N) 679 gen = self.md.disasm(d, ad) 680 681 try: 682 first = next(gen) 683 except StopIteration: 684 return None 685 686 self.capstone_inst[first.address] = first 687 for i in gen: 688 if i.address in self.capstone_inst: 689 break 690 self.capstone_inst[i.address] = i 691 692 return first 693 694 695 def __add_prefetch(self, addr_set, inst): 696 if self.is_mips: 697 prefetch = self.lazy_disasm(inst.address + inst.size) 698 addr_set.add(prefetch.address) 699 return prefetch 700 return None 701 702 703 def is_noreturn(self, ad): 704 func_obj = self.functions[ad] 705 if func_obj is None: 706 return False 707 return self.functions[ad][FUNC_FLAGS] & FUNC_FLAG_NORETURN 708 709 710 # Generate a flow graph of the given function (addr) 711 def get_graph(self, entry): 712 ARCH_UTILS = self.load_arch_module().utils 713 714 gph = Graph(self, entry) 715 stack = [entry] 716 start = time() 717 prefetch = None 718 addresses = set() 719 720 # WARNING: this assume that on every architectures the jump 721 # address is the last operand (operands[-1]) 722 723 # Here each instruction is a node. Blocks will be created in the 724 # function __simplify. 725 726 while stack: 727 ad = stack.pop() 728 inst = self.lazy_disasm(ad) 729 730 if inst is None: 731 # Remove all previous instructions which have a link 732 # to this instruction. 733 if ad in gph.link_in: 734 for i in gph.link_in[ad]: 735 gph.link_out[i].remove(ad) 736 for i in gph.link_in[ad]: 737 if not gph.link_out[i]: 738 del gph.link_out[i] 739 del gph.link_in[ad] 740 continue 741 742 if gph.exists(inst): 743 continue 744 745 addresses.add(ad) 746 747 if ARCH_UTILS.is_ret(inst): 748 prefetch = self.__add_prefetch(addresses, inst) 749 gph.new_node(inst, prefetch, None) 750 751 elif ARCH_UTILS.is_uncond_jump(inst): 752 prefetch = self.__add_prefetch(addresses, inst) 753 754 gph.uncond_jumps_set.add(ad) 755 op = inst.operands[-1] 756 757 if op.type == self.capstone.CS_OP_IMM: 758 nxt = unsigned(op.value.imm) 759 760 if nxt in self.functions or self.db.mem.is_data(nxt): 761 gph.new_node(inst, prefetch, None) 762 else: 763 stack.append(nxt) 764 gph.new_node(inst, prefetch, [nxt]) 765 766 else: 767 if inst.address in self.jmptables: 768 table = self.jmptables[inst.address].table 769 stack += table 770 gph.new_node(inst, prefetch, table) 771 else: 772 # Can't interpret jmp ADDR|reg 773 gph.new_node(inst, prefetch, None) 774 775 elif ARCH_UTILS.is_cond_jump(inst): 776 prefetch = self.__add_prefetch(addresses, inst) 777 778 gph.cond_jumps_set.add(ad) 779 op = inst.operands[-1] 780 781 if op.type == self.capstone.CS_OP_IMM: 782 if prefetch is None: 783 direct_nxt = inst.address + inst.size 784 else: 785 direct_nxt = prefetch.address + prefetch.size 786 787 nxt_jmp = unsigned(op.value.imm) 788 789 is_d1 = direct_nxt in self.functions or self.db.mem.is_data(direct_nxt) 790 is_d2 = nxt_jmp in self.functions or self.db.mem.is_data(nxt_jmp) 791 792 if is_d1 and is_d2: 793 gph.new_node(inst, prefetch, None) 794 elif not (is_d1 and is_d2): 795 stack.append(direct_nxt) 796 stack.append(nxt_jmp) 797 gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp]) 798 elif is_d1: 799 stack.append(nxt_jmp) 800 gph.new_node(inst, prefetch, [nxt_jmp]) 801 elif is_d2: 802 stack.append(direct_nxt) 803 gph.new_node(inst, prefetch, [direct_nxt]) 804 else: 805 # Can't interpret jmp ADDR|reg 806 gph.new_node(inst, prefetch, None) 807 808 else: 809 if ad != entry and ARCH_UTILS.is_call(inst): 810 # TODO: like in the analyzer, simulate registers 811 # -> during the analysis, save in the database 812 # the immediate value. 813 op = inst.operands[0] 814 if op.type == self.capstone.CS_OP_IMM: 815 imm = unsigned(op.value.imm) 816 if imm in self.functions and self.is_noreturn(imm): 817 prefetch = self.__add_prefetch(addresses, inst) 818 gph.new_node(inst, prefetch, None) 819 gph.exit_or_ret.add(ad) 820 continue 821 822 if op.type == self.capstone.CS_OP_MEM: 823 if ad in self.db.immediates and \ 824 self.binary.imports[self.db.immediates[ad]] & FUNC_FLAG_NORETURN: 825 prefetch = self.__add_prefetch(addresses, inst) 826 gph.new_node(inst, prefetch, None) 827 gph.exit_or_ret.add(ad) 828 continue 829 830 nxt = inst.address + inst.size 831 832 if nxt in self.functions or self.db.mem.is_data(nxt): 833 gph.new_node(inst, None, None) 834 else: 835 stack.append(nxt) 836 gph.new_node(inst, None, [nxt]) 837 838 if len(gph.nodes) == 0: 839 return None, 0 840 841 if self.binary.type == T_BIN_PE: 842 nb_new_syms = self.binary.reverse_stripped_list(self, addresses) 843 else: 844 nb_new_syms = 0 845 846 elapsed = time() 847 elapsed = elapsed - start 848 debug__("Graph built in %fs (%d instructions)" % (elapsed, len(gph.nodes))) 849 850 return gph, nb_new_syms 851