1import capstone
2import binwalk.core.common
3import binwalk.core.compat
4from binwalk.core.module import Module, Option, Kwarg
5
6class ArchResult(object):
7    def __init__(self, **kwargs):
8        for (k,v) in binwalk.core.compat.iterator(kwargs):
9            setattr(self, k, v)
10
11class Architecture(object):
12    def __init__(self, **kwargs):
13        for (k, v) in binwalk.core.compat.iterator(kwargs):
14            setattr(self, k, v)
15
16class Disasm(Module):
17
18    THRESHOLD = 10
19    DEFAULT_MIN_INSN_COUNT = 500
20
21    TITLE = "Disassembly Scan"
22    ORDER = 10
23
24    CLI = [
25            Option(short='Y',
26                   long='disasm',
27                   kwargs={'enabled' : True},
28                   description='Identify the CPU architecture of a file using the capstone disassembler'),
29            Option(short='T',
30                   long='minsn',
31                   type=int,
32                   kwargs={'min_insn_count' : 0},
33                   description='Minimum number of consecutive instructions to be considered valid (default: %d)' % DEFAULT_MIN_INSN_COUNT),
34            Option(long='continue',
35                   short='k',
36                   kwargs={'keep_going' : True},
37                   description="Don't stop at the first match"),
38          ]
39
40    KWARGS = [
41                Kwarg(name='enabled', default=False),
42                Kwarg(name='keep_going', default=False),
43                Kwarg(name='min_insn_count', default=DEFAULT_MIN_INSN_COUNT),
44             ]
45
46    ARCHITECTURES = [
47                    Architecture(type=capstone.CS_ARCH_ARM,
48                                 mode=capstone.CS_MODE_ARM,
49                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
50                                 description="ARM executable code, 32-bit, big endian"),
51                    Architecture(type=capstone.CS_ARCH_ARM,
52                                 mode=capstone.CS_MODE_ARM,
53                                 endianess=capstone.CS_MODE_LITTLE_ENDIAN,
54                                 description="ARM executable code, 32-bit, little endian"),
55                    Architecture(type=capstone.CS_ARCH_ARM64,
56                                 mode=capstone.CS_MODE_ARM,
57                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
58                                 description="ARM executable code, 64-bit, big endian"),
59                    Architecture(type=capstone.CS_ARCH_ARM64,
60                                 mode=capstone.CS_MODE_ARM,
61                                 endianess=capstone.CS_MODE_LITTLE_ENDIAN,
62                                 description="ARM executable code, 64-bit, little endian"),
63
64                    Architecture(type=capstone.CS_ARCH_PPC,
65                                 mode=capstone.CS_MODE_BIG_ENDIAN,
66                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
67                                 description="PPC executable code, 32/64-bit, big endian"),
68
69                    Architecture(type=capstone.CS_ARCH_MIPS,
70                                 mode=capstone.CS_MODE_64,
71                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
72                                 description="MIPS executable code, 32/64-bit, big endian"),
73                    Architecture(type=capstone.CS_ARCH_MIPS,
74                                 mode=capstone.CS_MODE_64,
75                                 endianess=capstone.CS_MODE_LITTLE_ENDIAN,
76                                 description="MIPS executable code, 32/64-bit, little endian"),
77
78                    Architecture(type=capstone.CS_ARCH_ARM,
79                                 mode=capstone.CS_MODE_THUMB,
80                                 endianess=capstone.CS_MODE_LITTLE_ENDIAN,
81                                 description="ARM executable code, 16-bit (Thumb), little endian"),
82                    Architecture(type=capstone.CS_ARCH_ARM,
83                                 mode=capstone.CS_MODE_THUMB,
84                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
85                                 description="ARM executable code, 16-bit (Thumb), big endian"),
86                    ]
87
88    def init(self):
89        self.disassemblers = []
90
91        if not self.min_insn_count:
92            self.min_insn_count = self.DEFAULT_MIN_INSN_COUNT
93
94        self.disasm_data_size = self.min_insn_count * 10
95
96        for arch in self.ARCHITECTURES:
97            self.disassemblers.append((capstone.Cs(arch.type, (arch.mode + arch.endianess)), arch.description))
98
99    def scan_file(self, fp):
100        total_read = 0
101
102        while True:
103            result = None
104
105            (data, dlen) = fp.read_block()
106            if not data:
107                break
108
109            # If this data block doesn't contain at least two different bytes, skip it
110            # to prevent false positives (e.g., "\x00\x00\x00\x00" is a nop in MIPS).
111            if len(set(data)) >= 2:
112                block_offset = 0
113
114                # Loop through the entire block, or until we're pretty sure we've found some valid code in this block
115                while (block_offset < dlen) and (result is None or result.count < self.THRESHOLD):
116                    # Don't pass the entire data block into disasm_lite, it's horribly inefficient
117                    # to pass large strings around in Python. Break it up into smaller code blocks instead.
118                    code_block = binwalk.core.compat.str2bytes(data[block_offset:block_offset+self.disasm_data_size])
119
120                    # If this code block doesn't contain at least two different bytes, skip it
121                    # to prevent false positives (e.g., "\x00\x00\x00\x00" is a nop in MIPS).
122                    if len(set(code_block)) >= 2:
123                        for (md, description) in self.disassemblers:
124                            insns = [insn for insn in md.disasm_lite(code_block, (total_read+block_offset))]
125                            binwalk.core.common.debug("0x%.8X   %s, at least %d valid instructions" % ((total_read+block_offset),
126                                                                                                        description,
127                                                                                                        len(insns)))
128
129                            # Did we disassemble at least self.min_insn_count instructions?
130                            if len(insns) >= self.min_insn_count:
131                                # If we've already found the same type of code in this block, simply update the result counter
132                                if result and result.description == description:
133                                    result.count += 1
134                                    if result.count >= self.THRESHOLD:
135                                        break
136                                else:
137                                    result = ArchResult(offset=total_read+block_offset+fp.offset,
138                                                        description=description,
139                                                        insns=insns,
140                                                        count=1)
141
142                    block_offset += 1
143                    self.status.completed += 1
144
145                if result is not None:
146                    r = self.result(offset=result.offset,
147                                    file=fp,
148                                    description=(result.description + ", at least %d valid instructions" % len(result.insns)))
149
150                    if r.valid and r.display:
151                        if self.config.verbose:
152                            for (position, size, mnem, opnds) in result.insns:
153                                self.result(offset=position, file=fp, description="%s %s" % (mnem, opnds))
154                        if not self.keep_going:
155                            return
156
157            total_read += dlen
158            self.status.completed = total_read
159
160    def run(self):
161        for fp in iter(self.next_file, None):
162            self.header()
163            self.scan_file(fp)
164            self.footer()
165
166