1import capstone 2import binwalk.core.common 3import binwalk.core.compat 4from binwalk.core.module import Module, Option, Kwarg 5 6class ArchResult(object): 7 def __init__(self, **kwargs): 8 for (k,v) in binwalk.core.compat.iterator(kwargs): 9 setattr(self, k, v) 10 11class Architecture(object): 12 def __init__(self, **kwargs): 13 for (k, v) in binwalk.core.compat.iterator(kwargs): 14 setattr(self, k, v) 15 16class Disasm(Module): 17 18 THRESHOLD = 10 19 DEFAULT_MIN_INSN_COUNT = 500 20 21 TITLE = "Disassembly Scan" 22 ORDER = 10 23 24 CLI = [ 25 Option(short='Y', 26 long='disasm', 27 kwargs={'enabled' : True}, 28 description='Identify the CPU architecture of a file using the capstone disassembler'), 29 Option(short='T', 30 long='minsn', 31 type=int, 32 kwargs={'min_insn_count' : 0}, 33 description='Minimum number of consecutive instructions to be considered valid (default: %d)' % DEFAULT_MIN_INSN_COUNT), 34 Option(long='continue', 35 short='k', 36 kwargs={'keep_going' : True}, 37 description="Don't stop at the first match"), 38 ] 39 40 KWARGS = [ 41 Kwarg(name='enabled', default=False), 42 Kwarg(name='keep_going', default=False), 43 Kwarg(name='min_insn_count', default=DEFAULT_MIN_INSN_COUNT), 44 ] 45 46 ARCHITECTURES = [ 47 Architecture(type=capstone.CS_ARCH_ARM, 48 mode=capstone.CS_MODE_ARM, 49 endianess=capstone.CS_MODE_BIG_ENDIAN, 50 description="ARM executable code, 32-bit, big endian"), 51 Architecture(type=capstone.CS_ARCH_ARM, 52 mode=capstone.CS_MODE_ARM, 53 endianess=capstone.CS_MODE_LITTLE_ENDIAN, 54 description="ARM executable code, 32-bit, little endian"), 55 Architecture(type=capstone.CS_ARCH_ARM64, 56 mode=capstone.CS_MODE_ARM, 57 endianess=capstone.CS_MODE_BIG_ENDIAN, 58 description="ARM executable code, 64-bit, big endian"), 59 Architecture(type=capstone.CS_ARCH_ARM64, 60 mode=capstone.CS_MODE_ARM, 61 endianess=capstone.CS_MODE_LITTLE_ENDIAN, 62 description="ARM executable code, 64-bit, little endian"), 63 64 Architecture(type=capstone.CS_ARCH_PPC, 65 mode=capstone.CS_MODE_BIG_ENDIAN, 66 endianess=capstone.CS_MODE_BIG_ENDIAN, 67 description="PPC executable code, 32/64-bit, big endian"), 68 69 Architecture(type=capstone.CS_ARCH_MIPS, 70 mode=capstone.CS_MODE_64, 71 endianess=capstone.CS_MODE_BIG_ENDIAN, 72 description="MIPS executable code, 32/64-bit, big endian"), 73 Architecture(type=capstone.CS_ARCH_MIPS, 74 mode=capstone.CS_MODE_64, 75 endianess=capstone.CS_MODE_LITTLE_ENDIAN, 76 description="MIPS executable code, 32/64-bit, little endian"), 77 78 Architecture(type=capstone.CS_ARCH_ARM, 79 mode=capstone.CS_MODE_THUMB, 80 endianess=capstone.CS_MODE_LITTLE_ENDIAN, 81 description="ARM executable code, 16-bit (Thumb), little endian"), 82 Architecture(type=capstone.CS_ARCH_ARM, 83 mode=capstone.CS_MODE_THUMB, 84 endianess=capstone.CS_MODE_BIG_ENDIAN, 85 description="ARM executable code, 16-bit (Thumb), big endian"), 86 ] 87 88 def init(self): 89 self.disassemblers = [] 90 91 if not self.min_insn_count: 92 self.min_insn_count = self.DEFAULT_MIN_INSN_COUNT 93 94 self.disasm_data_size = self.min_insn_count * 10 95 96 for arch in self.ARCHITECTURES: 97 self.disassemblers.append((capstone.Cs(arch.type, (arch.mode + arch.endianess)), arch.description)) 98 99 def scan_file(self, fp): 100 total_read = 0 101 102 while True: 103 result = None 104 105 (data, dlen) = fp.read_block() 106 if not data: 107 break 108 109 # If this data block doesn't contain at least two different bytes, skip it 110 # to prevent false positives (e.g., "\x00\x00\x00\x00" is a nop in MIPS). 111 if len(set(data)) >= 2: 112 block_offset = 0 113 114 # Loop through the entire block, or until we're pretty sure we've found some valid code in this block 115 while (block_offset < dlen) and (result is None or result.count < self.THRESHOLD): 116 # Don't pass the entire data block into disasm_lite, it's horribly inefficient 117 # to pass large strings around in Python. Break it up into smaller code blocks instead. 118 code_block = binwalk.core.compat.str2bytes(data[block_offset:block_offset+self.disasm_data_size]) 119 120 # If this code block doesn't contain at least two different bytes, skip it 121 # to prevent false positives (e.g., "\x00\x00\x00\x00" is a nop in MIPS). 122 if len(set(code_block)) >= 2: 123 for (md, description) in self.disassemblers: 124 insns = [insn for insn in md.disasm_lite(code_block, (total_read+block_offset))] 125 binwalk.core.common.debug("0x%.8X %s, at least %d valid instructions" % ((total_read+block_offset), 126 description, 127 len(insns))) 128 129 # Did we disassemble at least self.min_insn_count instructions? 130 if len(insns) >= self.min_insn_count: 131 # If we've already found the same type of code in this block, simply update the result counter 132 if result and result.description == description: 133 result.count += 1 134 if result.count >= self.THRESHOLD: 135 break 136 else: 137 result = ArchResult(offset=total_read+block_offset+fp.offset, 138 description=description, 139 insns=insns, 140 count=1) 141 142 block_offset += 1 143 self.status.completed += 1 144 145 if result is not None: 146 r = self.result(offset=result.offset, 147 file=fp, 148 description=(result.description + ", at least %d valid instructions" % len(result.insns))) 149 150 if r.valid and r.display: 151 if self.config.verbose: 152 for (position, size, mnem, opnds) in result.insns: 153 self.result(offset=position, file=fp, description="%s %s" % (mnem, opnds)) 154 if not self.keep_going: 155 return 156 157 total_read += dlen 158 self.status.completed = total_read 159 160 def run(self): 161 for fp in iter(self.next_file, None): 162 self.header() 163 self.scan_file(fp) 164 self.footer() 165 166