1#!~/.wine/drive_c/Python25/python.exe
2# -*- coding: utf-8 -*-
3
4# Copyright (c) 2009-2014, Mario Vilas
5# All rights reserved.
6#
7# Redistribution and use in source and binary forms, with or without
8# modification, are permitted provided that the following conditions are met:
9#
10#     * Redistributions of source code must retain the above copyright notice,
11#       this list of conditions and the following disclaimer.
12#     * Redistributions in binary form must reproduce the above copyright
13#       notice,this list of conditions and the following disclaimer in the
14#       documentation and/or other materials provided with the distribution.
15#     * Neither the name of the copyright holder nor the names of its
16#       contributors may be used to endorse or promote products derived from
17#       this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29# POSSIBILITY OF SUCH DAMAGE.
30
31"""
32Binary code disassembly.
33
34@group Disassembler loader:
35    Disassembler, Engine
36
37@group Disassembler engines:
38    BeaEngine, CapstoneEngine, DistormEngine,
39    LibdisassembleEngine, PyDasmEngine
40"""
41
42from __future__ import with_statement
43
44__revision__ = "$Id$"
45
46__all__ = [
47    'Disassembler',
48    'Engine',
49    'BeaEngine',
50    'CapstoneEngine',
51    'DistormEngine',
52    'LibdisassembleEngine',
53    'PyDasmEngine',
54]
55
56from winappdbg.textio import HexDump
57from winappdbg import win32
58
59import ctypes
60import warnings
61
62# lazy imports
63BeaEnginePython = None
64distorm3 = None
65pydasm = None
66libdisassemble = None
67capstone = None
68
69#==============================================================================
70
71class Engine (object):
72    """
73    Base class for disassembly engine adaptors.
74
75    @type name: str
76    @cvar name: Engine name to use with the L{Disassembler} class.
77
78    @type desc: str
79    @cvar desc: User friendly name of the disassembler engine.
80
81    @type url: str
82    @cvar url: Download URL.
83
84    @type supported: set(str)
85    @cvar supported: Set of supported processor architectures.
86        For more details see L{win32.version._get_arch}.
87
88    @type arch: str
89    @ivar arch: Name of the processor architecture.
90    """
91
92    name = "<insert engine name here>"
93    desc = "<insert engine description here>"
94    url  = "<insert download url here>"
95    supported = set()
96
97    def __init__(self, arch = None):
98        """
99        @type  arch: str
100        @param arch: Name of the processor architecture.
101            If not provided the current processor architecture is assumed.
102            For more details see L{win32.version._get_arch}.
103
104        @raise NotImplementedError: This disassembler doesn't support the
105            requested processor architecture.
106        """
107        self.arch = self._validate_arch(arch)
108        try:
109            self._import_dependencies()
110        except ImportError:
111            msg = "%s is not installed or can't be found. Download it from: %s"
112            msg = msg % (self.name, self.url)
113            raise NotImplementedError(msg)
114
115    def _validate_arch(self, arch = None):
116        """
117        @type  arch: str
118        @param arch: Name of the processor architecture.
119            If not provided the current processor architecture is assumed.
120            For more details see L{win32.version._get_arch}.
121
122        @rtype:  str
123        @return: Name of the processor architecture.
124            If not provided the current processor architecture is assumed.
125            For more details see L{win32.version._get_arch}.
126
127        @raise NotImplementedError: This disassembler doesn't support the
128            requested processor architecture.
129        """
130
131        # Use the default architecture if none specified.
132        if not arch:
133            arch = win32.arch
134
135        # Validate the architecture.
136        if arch not in self.supported:
137            msg = "The %s engine cannot decode %s code."
138            msg = msg % (self.name, arch)
139            raise NotImplementedError(msg)
140
141        # Return the architecture.
142        return arch
143
144    def _import_dependencies(self):
145        """
146        Loads the dependencies for this disassembler.
147
148        @raise ImportError: This disassembler cannot find or load the
149            necessary dependencies to make it work.
150        """
151        raise SyntaxError("Subclasses MUST implement this method!")
152
153    def decode(self, address, code):
154        """
155        @type  address: int
156        @param address: Memory address where the code was read from.
157
158        @type  code: str
159        @param code: Machine code to disassemble.
160
161        @rtype:  list of tuple( long, int, str, str )
162        @return: List of tuples. Each tuple represents an assembly instruction
163            and contains:
164             - Memory address of instruction.
165             - Size of instruction in bytes.
166             - Disassembly line of instruction.
167             - Hexadecimal dump of instruction.
168
169        @raise NotImplementedError: This disassembler could not be loaded.
170            This may be due to missing dependencies.
171        """
172        raise NotImplementedError()
173
174#==============================================================================
175
176class BeaEngine (Engine):
177    """
178    Integration with the BeaEngine disassembler by Beatrix.
179
180    @see: U{https://sourceforge.net/projects/winappdbg/files/additional%20packages/BeaEngine/}
181    """
182
183    name = "BeaEngine"
184    desc = "BeaEngine disassembler by Beatrix"
185    url  = "https://sourceforge.net/projects/winappdbg/files/additional%20packages/BeaEngine/"
186
187    supported = set((
188        win32.ARCH_I386,
189        win32.ARCH_AMD64,
190    ))
191
192    def _import_dependencies(self):
193
194        # Load the BeaEngine ctypes wrapper.
195        global BeaEnginePython
196        if BeaEnginePython is None:
197            import BeaEnginePython
198
199    def decode(self, address, code):
200        addressof = ctypes.addressof
201
202        # Instance the code buffer.
203        buffer = ctypes.create_string_buffer(code)
204        buffer_ptr = addressof(buffer)
205
206        # Instance the disassembler structure.
207        Instruction = BeaEnginePython.DISASM()
208        Instruction.VirtualAddr = address
209        Instruction.EIP = buffer_ptr
210        Instruction.SecurityBlock = buffer_ptr + len(code)
211        if self.arch == win32.ARCH_I386:
212            Instruction.Archi = 0
213        else:
214            Instruction.Archi = 0x40
215        Instruction.Options = ( BeaEnginePython.Tabulation      +
216                                BeaEnginePython.NasmSyntax      +
217                                BeaEnginePython.SuffixedNumeral +
218                                BeaEnginePython.ShowSegmentRegs )
219
220        # Prepare for looping over each instruction.
221        result = []
222        Disasm = BeaEnginePython.Disasm
223        InstructionPtr = addressof(Instruction)
224        hexdump = HexDump.hexadecimal
225        append = result.append
226        OUT_OF_BLOCK   = BeaEnginePython.OUT_OF_BLOCK
227        UNKNOWN_OPCODE = BeaEnginePython.UNKNOWN_OPCODE
228
229        # For each decoded instruction...
230        while True:
231
232            # Calculate the current offset into the buffer.
233            offset = Instruction.EIP - buffer_ptr
234
235            # If we've gone past the buffer, break the loop.
236            if offset >= len(code):
237                break
238
239            # Decode the current instruction.
240            InstrLength = Disasm(InstructionPtr)
241
242            # If BeaEngine detects we've gone past the buffer, break the loop.
243            if InstrLength == OUT_OF_BLOCK:
244                break
245
246            # The instruction could not be decoded.
247            if InstrLength == UNKNOWN_OPCODE:
248
249                # Output a single byte as a "db" instruction.
250                char = "%.2X" % ord(buffer[offset])
251                result.append((
252                    Instruction.VirtualAddr,
253                    1,
254                    "db %sh" % char,
255                    char,
256                ))
257                Instruction.VirtualAddr += 1
258                Instruction.EIP += 1
259
260            # The instruction was decoded but reading past the buffer's end.
261            # This can happen when the last instruction is a prefix without an
262            # opcode. For example: decode(0, '\x66')
263            elif offset + InstrLength > len(code):
264
265                # Output each byte as a "db" instruction.
266                for char in buffer[ offset : offset + len(code) ]:
267                    char = "%.2X" % ord(char)
268                    result.append((
269                        Instruction.VirtualAddr,
270                        1,
271                        "db %sh" % char,
272                        char,
273                    ))
274                    Instruction.VirtualAddr += 1
275                    Instruction.EIP += 1
276
277            # The instruction was decoded correctly.
278            else:
279
280                # Output the decoded instruction.
281                append((
282                    Instruction.VirtualAddr,
283                    InstrLength,
284                    Instruction.CompleteInstr.strip(),
285                    hexdump(buffer.raw[offset:offset+InstrLength]),
286                ))
287                Instruction.VirtualAddr += InstrLength
288                Instruction.EIP += InstrLength
289
290        # Return the list of decoded instructions.
291        return result
292
293#==============================================================================
294
295class DistormEngine (Engine):
296    """
297    Integration with the diStorm disassembler by Gil Dabah.
298
299    @see: U{https://code.google.com/p/distorm3}
300    """
301
302    name = "diStorm"
303    desc = "diStorm disassembler by Gil Dabah"
304    url  = "https://code.google.com/p/distorm3"
305
306    supported = set((
307        win32.ARCH_I386,
308        win32.ARCH_AMD64,
309    ))
310
311    def _import_dependencies(self):
312
313        # Load the distorm bindings.
314        global distorm3
315        if distorm3 is None:
316            try:
317                import distorm3
318            except ImportError:
319                import distorm as distorm3
320
321        # Load the decoder function.
322        self.__decode = distorm3.Decode
323
324        # Load the bits flag.
325        self.__flag = {
326            win32.ARCH_I386:  distorm3.Decode32Bits,
327            win32.ARCH_AMD64: distorm3.Decode64Bits,
328        }[self.arch]
329
330    def decode(self, address, code):
331        return self.__decode(address, code, self.__flag)
332
333#==============================================================================
334
335class PyDasmEngine (Engine):
336    """
337    Integration with PyDasm: Python bindings to libdasm.
338
339    @see: U{https://code.google.com/p/libdasm/}
340    """
341
342    name = "PyDasm"
343    desc = "PyDasm: Python bindings to libdasm"
344    url  = "https://code.google.com/p/libdasm/"
345
346    supported = set((
347        win32.ARCH_I386,
348    ))
349
350    def _import_dependencies(self):
351
352        # Load the libdasm bindings.
353        global pydasm
354        if pydasm is None:
355            import pydasm
356
357    def decode(self, address, code):
358
359        # Decode each instruction in the buffer.
360        result = []
361        offset = 0
362        while offset < len(code):
363
364            # Try to decode the current instruction.
365            instruction = pydasm.get_instruction(code[offset:offset+32],
366                                                 pydasm.MODE_32)
367
368            # Get the memory address of the current instruction.
369            current = address + offset
370
371            # Illegal opcode or opcode longer than remaining buffer.
372            if not instruction or instruction.length + offset > len(code):
373                hexdump = '%.2X' % ord(code[offset])
374                disasm  = 'db 0x%s' % hexdump
375                ilen    = 1
376
377            # Correctly decoded instruction.
378            else:
379                disasm  = pydasm.get_instruction_string(instruction,
380                                                        pydasm.FORMAT_INTEL,
381                                                        current)
382                ilen    = instruction.length
383                hexdump = HexDump.hexadecimal(code[offset:offset+ilen])
384
385            # Add the decoded instruction to the list.
386            result.append((
387                current,
388                ilen,
389                disasm,
390                hexdump,
391            ))
392
393            # Move to the next instruction.
394            offset += ilen
395
396        # Return the list of decoded instructions.
397        return result
398
399#==============================================================================
400
401class LibdisassembleEngine (Engine):
402    """
403    Integration with Immunity libdisassemble.
404
405    @see: U{http://www.immunitysec.com/resources-freesoftware.shtml}
406    """
407
408    name = "Libdisassemble"
409    desc = "Immunity libdisassemble"
410    url  = "http://www.immunitysec.com/resources-freesoftware.shtml"
411
412    supported = set((
413        win32.ARCH_I386,
414    ))
415
416    def _import_dependencies(self):
417
418        # Load the libdisassemble module.
419        # Since it doesn't come with an installer or an __init__.py file
420        # users can only install it manually however they feel like it,
421        # so we'll have to do a bit of guessing to find it.
422
423        global libdisassemble
424        if libdisassemble is None:
425            try:
426
427                # If installed properly with __init__.py
428                import libdisassemble.disassemble as libdisassemble
429
430            except ImportError:
431
432                # If installed by just copying and pasting the files
433                import disassemble as libdisassemble
434
435    def decode(self, address, code):
436
437        # Decode each instruction in the buffer.
438        result = []
439        offset = 0
440        while offset < len(code):
441
442            # Decode the current instruction.
443            opcode  = libdisassemble.Opcode( code[offset:offset+32] )
444            length  = opcode.getSize()
445            disasm  = opcode.printOpcode('INTEL')
446            hexdump = HexDump.hexadecimal( code[offset:offset+length] )
447
448            # Add the decoded instruction to the list.
449            result.append((
450                address + offset,
451                length,
452                disasm,
453                hexdump,
454            ))
455
456            # Move to the next instruction.
457            offset += length
458
459        # Return the list of decoded instructions.
460        return result
461
462#==============================================================================
463
464class CapstoneEngine (Engine):
465    """
466    Integration with the Capstone disassembler by Nguyen Anh Quynh.
467
468    @see: U{http://www.capstone-engine.org/}
469    """
470
471    name = "Capstone"
472    desc = "Capstone disassembler by Nguyen Anh Quynh"
473    url  = "http://www.capstone-engine.org/"
474
475    supported = set((
476        win32.ARCH_I386,
477        win32.ARCH_AMD64,
478        win32.ARCH_THUMB,
479        win32.ARCH_ARM,
480        win32.ARCH_ARM64,
481    ))
482
483    def _import_dependencies(self):
484
485        # Load the Capstone bindings.
486        global capstone
487        if capstone is None:
488            import capstone
489
490        # Load the constants for the requested architecture.
491        self.__constants = {
492            win32.ARCH_I386:
493                (capstone.CS_ARCH_X86,   capstone.CS_MODE_32),
494            win32.ARCH_AMD64:
495                (capstone.CS_ARCH_X86,   capstone.CS_MODE_64),
496            win32.ARCH_THUMB:
497                (capstone.CS_ARCH_ARM,   capstone.CS_MODE_THUMB),
498            win32.ARCH_ARM:
499                (capstone.CS_ARCH_ARM,   capstone.CS_MODE_ARM),
500            win32.ARCH_ARM64:
501                (capstone.CS_ARCH_ARM64, capstone.CS_MODE_ARM),
502        }
503
504        # Test for the bug in early versions of Capstone.
505        # If found, warn the user about it.
506        try:
507            self.__bug = not isinstance(
508                capstone.cs_disasm_quick(
509                    capstone.CS_ARCH_X86, capstone.CS_MODE_32, "\x90", 1)[0],
510                capstone.capstone.CsInsn)
511        except AttributeError:
512            self.__bug = False
513        if self.__bug:
514            warnings.warn(
515                "This version of the Capstone bindings is unstable,"
516                " please upgrade to a newer one!",
517                RuntimeWarning, stacklevel=4)
518
519
520    def decode(self, address, code):
521
522        # Get the constants for the requested architecture.
523        arch, mode = self.__constants[self.arch]
524
525        # Get the decoder function outside the loop.
526        decoder = capstone.cs_disasm_quick
527
528        # If the buggy version of the bindings are being used, we need to catch
529        # all exceptions broadly. If not, we only need to catch CsError.
530        if self.__bug:
531            CsError = Exception
532        else:
533            CsError = capstone.CsError
534
535        # Create the variables for the instruction length, mnemonic and
536        # operands. That way they won't be created within the loop,
537        # minimizing the chances data might be overwritten.
538        # This only makes sense for the buggy vesion of the bindings, normally
539        # memory accesses are safe).
540        length = mnemonic = op_str = None
541
542        # For each instruction...
543        result = []
544        offset = 0
545        while offset < len(code):
546
547            # Disassemble a single instruction, because disassembling multiple
548            # instructions may cause excessive memory usage (Capstone allocates
549            # approximately 1K of metadata per each decoded instruction).
550            instr = None
551            try:
552                instr = decoder(
553                    arch, mode, code[offset:offset+16], address+offset, 1)[0]
554            except IndexError:
555                pass   # No instructions decoded.
556            except CsError:
557                pass   # Any other error.
558
559            # On success add the decoded instruction.
560            if instr is not None:
561
562                # Get the instruction length, mnemonic and operands.
563                # Copy the values quickly before someone overwrites them,
564                # if using the buggy version of the bindings (otherwise it's
565                # irrelevant in which order we access the properties).
566                length   = instr.size
567                mnemonic = instr.mnemonic
568                op_str   = instr.op_str
569
570                # Concatenate the mnemonic and the operands.
571                if op_str:
572                    disasm = "%s %s" % (mnemonic, op_str)
573                else:
574                    disasm = mnemonic
575
576                # Get the instruction bytes as a hexadecimal dump.
577                hexdump = HexDump.hexadecimal( code[offset:offset+length] )
578
579            # On error add a "define constant" instruction.
580            # The exact instruction depends on the architecture.
581            else:
582
583                # The number of bytes to skip depends on the architecture.
584                # On Intel processors we'll skip one byte, since we can't
585                # really know the instruction length. On the rest of the
586                # architectures we always know the instruction length.
587                if self.arch in (win32.ARCH_I386, win32.ARCH_AMD64):
588                    length = 1
589                else:
590                    length = 4
591
592                # Get the skipped bytes as a hexadecimal dump.
593                skipped = code[offset:offset+length]
594                hexdump = HexDump.hexadecimal(skipped)
595
596                # Build the "define constant" instruction.
597                # On Intel processors it's "db".
598                # On ARM processors it's "dcb".
599                if self.arch in (win32.ARCH_I386, win32.ARCH_AMD64):
600                    mnemonic = "db "
601                else:
602                    mnemonic = "dcb "
603                bytes = []
604                for b in skipped:
605                    if b.isalpha():
606                        bytes.append("'%s'" % b)
607                    else:
608                        bytes.append("0x%x" % ord(b))
609                op_str = ", ".join(bytes)
610                disasm = mnemonic + op_str
611
612            # Add the decoded instruction to the list.
613            result.append((
614                address + offset,
615                length,
616                disasm,
617                hexdump,
618            ))
619
620            # Update the offset.
621            offset += length
622
623        # Return the list of decoded instructions.
624        return result
625
626#==============================================================================
627
628# TODO: use a lock to access __decoder
629# TODO: look in sys.modules for whichever disassembler is already loaded
630
631class Disassembler (object):
632    """
633    Generic disassembler. Uses a set of adapters to decide which library to
634    load for which supported platform.
635
636    @type engines: tuple( L{Engine} )
637    @cvar engines: Set of supported engines. If you implement your own adapter
638        you can add its class here to make it available to L{Disassembler}.
639        Supported disassemblers are:
640    """
641
642    engines = (
643        DistormEngine,  # diStorm engine goes first for backwards compatibility
644        BeaEngine,
645        CapstoneEngine,
646        LibdisassembleEngine,
647        PyDasmEngine,
648    )
649
650    # Add the list of supported disassemblers to the docstring.
651    __doc__ += "\n"
652    for e in engines:
653        __doc__ += "         - %s - %s (U{%s})\n" % (e.name, e.desc, e.url)
654    del e
655
656    # Cache of already loaded disassemblers.
657    __decoder = {}
658
659    def __new__(cls, arch = None, engine = None):
660        """
661        Factory class. You can't really instance a L{Disassembler} object,
662        instead one of the adapter L{Engine} subclasses is returned.
663
664        @type  arch: str
665        @param arch: (Optional) Name of the processor architecture.
666            If not provided the current processor architecture is assumed.
667            For more details see L{win32.version._get_arch}.
668
669        @type  engine: str
670        @param engine: (Optional) Name of the disassembler engine.
671            If not provided a compatible one is loaded automatically.
672            See: L{Engine.name}
673
674        @raise NotImplementedError: No compatible disassembler was found that
675            could decode machine code for the requested architecture. This may
676            be due to missing dependencies.
677
678        @raise ValueError: An unknown engine name was supplied.
679        """
680
681        # Use the default architecture if none specified.
682        if not arch:
683            arch = win32.arch
684
685        # Return a compatible engine if none specified.
686        if not engine:
687            found = False
688            for clazz in cls.engines:
689                try:
690                    if arch in clazz.supported:
691                        selected = (clazz.name, arch)
692                        try:
693                            decoder = cls.__decoder[selected]
694                        except KeyError:
695                            decoder = clazz(arch)
696                            cls.__decoder[selected] = decoder
697                        return decoder
698                except NotImplementedError:
699                    pass
700            msg = "No disassembler engine available for %s code." % arch
701            raise NotImplementedError(msg)
702
703        # Return the specified engine.
704        selected = (engine, arch)
705        try:
706            decoder = cls.__decoder[selected]
707        except KeyError:
708            found = False
709            engineLower = engine.lower()
710            for clazz in cls.engines:
711                if clazz.name.lower() == engineLower:
712                    found = True
713                    break
714            if not found:
715                msg = "Unsupported disassembler engine: %s" % engine
716                raise ValueError(msg)
717            if arch not in clazz.supported:
718                msg = "The %s engine cannot decode %s code." % selected
719                raise NotImplementedError(msg)
720            decoder = clazz(arch)
721            cls.__decoder[selected] = decoder
722        return decoder
723