1#!~/.wine/drive_c/Python25/python.exe 2# -*- coding: utf-8 -*- 3 4# Copyright (c) 2009-2014, Mario Vilas 5# All rights reserved. 6# 7# Redistribution and use in source and binary forms, with or without 8# modification, are permitted provided that the following conditions are met: 9# 10# * Redistributions of source code must retain the above copyright notice, 11# this list of conditions and the following disclaimer. 12# * Redistributions in binary form must reproduce the above copyright 13# notice,this list of conditions and the following disclaimer in the 14# documentation and/or other materials provided with the distribution. 15# * Neither the name of the copyright holder nor the names of its 16# contributors may be used to endorse or promote products derived from 17# this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29# POSSIBILITY OF SUCH DAMAGE. 30 31""" 32Binary code disassembly. 33 34@group Disassembler loader: 35 Disassembler, Engine 36 37@group Disassembler engines: 38 BeaEngine, CapstoneEngine, DistormEngine, 39 LibdisassembleEngine, PyDasmEngine 40""" 41 42from __future__ import with_statement 43 44__revision__ = "$Id$" 45 46__all__ = [ 47 'Disassembler', 48 'Engine', 49 'BeaEngine', 50 'CapstoneEngine', 51 'DistormEngine', 52 'LibdisassembleEngine', 53 'PyDasmEngine', 54] 55 56from winappdbg.textio import HexDump 57from winappdbg import win32 58 59import ctypes 60import warnings 61 62# lazy imports 63BeaEnginePython = None 64distorm3 = None 65pydasm = None 66libdisassemble = None 67capstone = None 68 69#============================================================================== 70 71class Engine (object): 72 """ 73 Base class for disassembly engine adaptors. 74 75 @type name: str 76 @cvar name: Engine name to use with the L{Disassembler} class. 77 78 @type desc: str 79 @cvar desc: User friendly name of the disassembler engine. 80 81 @type url: str 82 @cvar url: Download URL. 83 84 @type supported: set(str) 85 @cvar supported: Set of supported processor architectures. 86 For more details see L{win32.version._get_arch}. 87 88 @type arch: str 89 @ivar arch: Name of the processor architecture. 90 """ 91 92 name = "<insert engine name here>" 93 desc = "<insert engine description here>" 94 url = "<insert download url here>" 95 supported = set() 96 97 def __init__(self, arch = None): 98 """ 99 @type arch: str 100 @param arch: Name of the processor architecture. 101 If not provided the current processor architecture is assumed. 102 For more details see L{win32.version._get_arch}. 103 104 @raise NotImplementedError: This disassembler doesn't support the 105 requested processor architecture. 106 """ 107 self.arch = self._validate_arch(arch) 108 try: 109 self._import_dependencies() 110 except ImportError: 111 msg = "%s is not installed or can't be found. Download it from: %s" 112 msg = msg % (self.name, self.url) 113 raise NotImplementedError(msg) 114 115 def _validate_arch(self, arch = None): 116 """ 117 @type arch: str 118 @param arch: Name of the processor architecture. 119 If not provided the current processor architecture is assumed. 120 For more details see L{win32.version._get_arch}. 121 122 @rtype: str 123 @return: Name of the processor architecture. 124 If not provided the current processor architecture is assumed. 125 For more details see L{win32.version._get_arch}. 126 127 @raise NotImplementedError: This disassembler doesn't support the 128 requested processor architecture. 129 """ 130 131 # Use the default architecture if none specified. 132 if not arch: 133 arch = win32.arch 134 135 # Validate the architecture. 136 if arch not in self.supported: 137 msg = "The %s engine cannot decode %s code." 138 msg = msg % (self.name, arch) 139 raise NotImplementedError(msg) 140 141 # Return the architecture. 142 return arch 143 144 def _import_dependencies(self): 145 """ 146 Loads the dependencies for this disassembler. 147 148 @raise ImportError: This disassembler cannot find or load the 149 necessary dependencies to make it work. 150 """ 151 raise SyntaxError("Subclasses MUST implement this method!") 152 153 def decode(self, address, code): 154 """ 155 @type address: int 156 @param address: Memory address where the code was read from. 157 158 @type code: str 159 @param code: Machine code to disassemble. 160 161 @rtype: list of tuple( long, int, str, str ) 162 @return: List of tuples. Each tuple represents an assembly instruction 163 and contains: 164 - Memory address of instruction. 165 - Size of instruction in bytes. 166 - Disassembly line of instruction. 167 - Hexadecimal dump of instruction. 168 169 @raise NotImplementedError: This disassembler could not be loaded. 170 This may be due to missing dependencies. 171 """ 172 raise NotImplementedError() 173 174#============================================================================== 175 176class BeaEngine (Engine): 177 """ 178 Integration with the BeaEngine disassembler by Beatrix. 179 180 @see: U{https://sourceforge.net/projects/winappdbg/files/additional%20packages/BeaEngine/} 181 """ 182 183 name = "BeaEngine" 184 desc = "BeaEngine disassembler by Beatrix" 185 url = "https://sourceforge.net/projects/winappdbg/files/additional%20packages/BeaEngine/" 186 187 supported = set(( 188 win32.ARCH_I386, 189 win32.ARCH_AMD64, 190 )) 191 192 def _import_dependencies(self): 193 194 # Load the BeaEngine ctypes wrapper. 195 global BeaEnginePython 196 if BeaEnginePython is None: 197 import BeaEnginePython 198 199 def decode(self, address, code): 200 addressof = ctypes.addressof 201 202 # Instance the code buffer. 203 buffer = ctypes.create_string_buffer(code) 204 buffer_ptr = addressof(buffer) 205 206 # Instance the disassembler structure. 207 Instruction = BeaEnginePython.DISASM() 208 Instruction.VirtualAddr = address 209 Instruction.EIP = buffer_ptr 210 Instruction.SecurityBlock = buffer_ptr + len(code) 211 if self.arch == win32.ARCH_I386: 212 Instruction.Archi = 0 213 else: 214 Instruction.Archi = 0x40 215 Instruction.Options = ( BeaEnginePython.Tabulation + 216 BeaEnginePython.NasmSyntax + 217 BeaEnginePython.SuffixedNumeral + 218 BeaEnginePython.ShowSegmentRegs ) 219 220 # Prepare for looping over each instruction. 221 result = [] 222 Disasm = BeaEnginePython.Disasm 223 InstructionPtr = addressof(Instruction) 224 hexdump = HexDump.hexadecimal 225 append = result.append 226 OUT_OF_BLOCK = BeaEnginePython.OUT_OF_BLOCK 227 UNKNOWN_OPCODE = BeaEnginePython.UNKNOWN_OPCODE 228 229 # For each decoded instruction... 230 while True: 231 232 # Calculate the current offset into the buffer. 233 offset = Instruction.EIP - buffer_ptr 234 235 # If we've gone past the buffer, break the loop. 236 if offset >= len(code): 237 break 238 239 # Decode the current instruction. 240 InstrLength = Disasm(InstructionPtr) 241 242 # If BeaEngine detects we've gone past the buffer, break the loop. 243 if InstrLength == OUT_OF_BLOCK: 244 break 245 246 # The instruction could not be decoded. 247 if InstrLength == UNKNOWN_OPCODE: 248 249 # Output a single byte as a "db" instruction. 250 char = "%.2X" % ord(buffer[offset]) 251 result.append(( 252 Instruction.VirtualAddr, 253 1, 254 "db %sh" % char, 255 char, 256 )) 257 Instruction.VirtualAddr += 1 258 Instruction.EIP += 1 259 260 # The instruction was decoded but reading past the buffer's end. 261 # This can happen when the last instruction is a prefix without an 262 # opcode. For example: decode(0, '\x66') 263 elif offset + InstrLength > len(code): 264 265 # Output each byte as a "db" instruction. 266 for char in buffer[ offset : offset + len(code) ]: 267 char = "%.2X" % ord(char) 268 result.append(( 269 Instruction.VirtualAddr, 270 1, 271 "db %sh" % char, 272 char, 273 )) 274 Instruction.VirtualAddr += 1 275 Instruction.EIP += 1 276 277 # The instruction was decoded correctly. 278 else: 279 280 # Output the decoded instruction. 281 append(( 282 Instruction.VirtualAddr, 283 InstrLength, 284 Instruction.CompleteInstr.strip(), 285 hexdump(buffer.raw[offset:offset+InstrLength]), 286 )) 287 Instruction.VirtualAddr += InstrLength 288 Instruction.EIP += InstrLength 289 290 # Return the list of decoded instructions. 291 return result 292 293#============================================================================== 294 295class DistormEngine (Engine): 296 """ 297 Integration with the diStorm disassembler by Gil Dabah. 298 299 @see: U{https://code.google.com/p/distorm3} 300 """ 301 302 name = "diStorm" 303 desc = "diStorm disassembler by Gil Dabah" 304 url = "https://code.google.com/p/distorm3" 305 306 supported = set(( 307 win32.ARCH_I386, 308 win32.ARCH_AMD64, 309 )) 310 311 def _import_dependencies(self): 312 313 # Load the distorm bindings. 314 global distorm3 315 if distorm3 is None: 316 try: 317 import distorm3 318 except ImportError: 319 import distorm as distorm3 320 321 # Load the decoder function. 322 self.__decode = distorm3.Decode 323 324 # Load the bits flag. 325 self.__flag = { 326 win32.ARCH_I386: distorm3.Decode32Bits, 327 win32.ARCH_AMD64: distorm3.Decode64Bits, 328 }[self.arch] 329 330 def decode(self, address, code): 331 return self.__decode(address, code, self.__flag) 332 333#============================================================================== 334 335class PyDasmEngine (Engine): 336 """ 337 Integration with PyDasm: Python bindings to libdasm. 338 339 @see: U{https://code.google.com/p/libdasm/} 340 """ 341 342 name = "PyDasm" 343 desc = "PyDasm: Python bindings to libdasm" 344 url = "https://code.google.com/p/libdasm/" 345 346 supported = set(( 347 win32.ARCH_I386, 348 )) 349 350 def _import_dependencies(self): 351 352 # Load the libdasm bindings. 353 global pydasm 354 if pydasm is None: 355 import pydasm 356 357 def decode(self, address, code): 358 359 # Decode each instruction in the buffer. 360 result = [] 361 offset = 0 362 while offset < len(code): 363 364 # Try to decode the current instruction. 365 instruction = pydasm.get_instruction(code[offset:offset+32], 366 pydasm.MODE_32) 367 368 # Get the memory address of the current instruction. 369 current = address + offset 370 371 # Illegal opcode or opcode longer than remaining buffer. 372 if not instruction or instruction.length + offset > len(code): 373 hexdump = '%.2X' % ord(code[offset]) 374 disasm = 'db 0x%s' % hexdump 375 ilen = 1 376 377 # Correctly decoded instruction. 378 else: 379 disasm = pydasm.get_instruction_string(instruction, 380 pydasm.FORMAT_INTEL, 381 current) 382 ilen = instruction.length 383 hexdump = HexDump.hexadecimal(code[offset:offset+ilen]) 384 385 # Add the decoded instruction to the list. 386 result.append(( 387 current, 388 ilen, 389 disasm, 390 hexdump, 391 )) 392 393 # Move to the next instruction. 394 offset += ilen 395 396 # Return the list of decoded instructions. 397 return result 398 399#============================================================================== 400 401class LibdisassembleEngine (Engine): 402 """ 403 Integration with Immunity libdisassemble. 404 405 @see: U{http://www.immunitysec.com/resources-freesoftware.shtml} 406 """ 407 408 name = "Libdisassemble" 409 desc = "Immunity libdisassemble" 410 url = "http://www.immunitysec.com/resources-freesoftware.shtml" 411 412 supported = set(( 413 win32.ARCH_I386, 414 )) 415 416 def _import_dependencies(self): 417 418 # Load the libdisassemble module. 419 # Since it doesn't come with an installer or an __init__.py file 420 # users can only install it manually however they feel like it, 421 # so we'll have to do a bit of guessing to find it. 422 423 global libdisassemble 424 if libdisassemble is None: 425 try: 426 427 # If installed properly with __init__.py 428 import libdisassemble.disassemble as libdisassemble 429 430 except ImportError: 431 432 # If installed by just copying and pasting the files 433 import disassemble as libdisassemble 434 435 def decode(self, address, code): 436 437 # Decode each instruction in the buffer. 438 result = [] 439 offset = 0 440 while offset < len(code): 441 442 # Decode the current instruction. 443 opcode = libdisassemble.Opcode( code[offset:offset+32] ) 444 length = opcode.getSize() 445 disasm = opcode.printOpcode('INTEL') 446 hexdump = HexDump.hexadecimal( code[offset:offset+length] ) 447 448 # Add the decoded instruction to the list. 449 result.append(( 450 address + offset, 451 length, 452 disasm, 453 hexdump, 454 )) 455 456 # Move to the next instruction. 457 offset += length 458 459 # Return the list of decoded instructions. 460 return result 461 462#============================================================================== 463 464class CapstoneEngine (Engine): 465 """ 466 Integration with the Capstone disassembler by Nguyen Anh Quynh. 467 468 @see: U{http://www.capstone-engine.org/} 469 """ 470 471 name = "Capstone" 472 desc = "Capstone disassembler by Nguyen Anh Quynh" 473 url = "http://www.capstone-engine.org/" 474 475 supported = set(( 476 win32.ARCH_I386, 477 win32.ARCH_AMD64, 478 win32.ARCH_THUMB, 479 win32.ARCH_ARM, 480 win32.ARCH_ARM64, 481 )) 482 483 def _import_dependencies(self): 484 485 # Load the Capstone bindings. 486 global capstone 487 if capstone is None: 488 import capstone 489 490 # Load the constants for the requested architecture. 491 self.__constants = { 492 win32.ARCH_I386: 493 (capstone.CS_ARCH_X86, capstone.CS_MODE_32), 494 win32.ARCH_AMD64: 495 (capstone.CS_ARCH_X86, capstone.CS_MODE_64), 496 win32.ARCH_THUMB: 497 (capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB), 498 win32.ARCH_ARM: 499 (capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM), 500 win32.ARCH_ARM64: 501 (capstone.CS_ARCH_ARM64, capstone.CS_MODE_ARM), 502 } 503 504 # Test for the bug in early versions of Capstone. 505 # If found, warn the user about it. 506 try: 507 self.__bug = not isinstance( 508 capstone.cs_disasm_quick( 509 capstone.CS_ARCH_X86, capstone.CS_MODE_32, "\x90", 1)[0], 510 capstone.capstone.CsInsn) 511 except AttributeError: 512 self.__bug = False 513 if self.__bug: 514 warnings.warn( 515 "This version of the Capstone bindings is unstable," 516 " please upgrade to a newer one!", 517 RuntimeWarning, stacklevel=4) 518 519 520 def decode(self, address, code): 521 522 # Get the constants for the requested architecture. 523 arch, mode = self.__constants[self.arch] 524 525 # Get the decoder function outside the loop. 526 decoder = capstone.cs_disasm_quick 527 528 # If the buggy version of the bindings are being used, we need to catch 529 # all exceptions broadly. If not, we only need to catch CsError. 530 if self.__bug: 531 CsError = Exception 532 else: 533 CsError = capstone.CsError 534 535 # Create the variables for the instruction length, mnemonic and 536 # operands. That way they won't be created within the loop, 537 # minimizing the chances data might be overwritten. 538 # This only makes sense for the buggy vesion of the bindings, normally 539 # memory accesses are safe). 540 length = mnemonic = op_str = None 541 542 # For each instruction... 543 result = [] 544 offset = 0 545 while offset < len(code): 546 547 # Disassemble a single instruction, because disassembling multiple 548 # instructions may cause excessive memory usage (Capstone allocates 549 # approximately 1K of metadata per each decoded instruction). 550 instr = None 551 try: 552 instr = decoder( 553 arch, mode, code[offset:offset+16], address+offset, 1)[0] 554 except IndexError: 555 pass # No instructions decoded. 556 except CsError: 557 pass # Any other error. 558 559 # On success add the decoded instruction. 560 if instr is not None: 561 562 # Get the instruction length, mnemonic and operands. 563 # Copy the values quickly before someone overwrites them, 564 # if using the buggy version of the bindings (otherwise it's 565 # irrelevant in which order we access the properties). 566 length = instr.size 567 mnemonic = instr.mnemonic 568 op_str = instr.op_str 569 570 # Concatenate the mnemonic and the operands. 571 if op_str: 572 disasm = "%s %s" % (mnemonic, op_str) 573 else: 574 disasm = mnemonic 575 576 # Get the instruction bytes as a hexadecimal dump. 577 hexdump = HexDump.hexadecimal( code[offset:offset+length] ) 578 579 # On error add a "define constant" instruction. 580 # The exact instruction depends on the architecture. 581 else: 582 583 # The number of bytes to skip depends on the architecture. 584 # On Intel processors we'll skip one byte, since we can't 585 # really know the instruction length. On the rest of the 586 # architectures we always know the instruction length. 587 if self.arch in (win32.ARCH_I386, win32.ARCH_AMD64): 588 length = 1 589 else: 590 length = 4 591 592 # Get the skipped bytes as a hexadecimal dump. 593 skipped = code[offset:offset+length] 594 hexdump = HexDump.hexadecimal(skipped) 595 596 # Build the "define constant" instruction. 597 # On Intel processors it's "db". 598 # On ARM processors it's "dcb". 599 if self.arch in (win32.ARCH_I386, win32.ARCH_AMD64): 600 mnemonic = "db " 601 else: 602 mnemonic = "dcb " 603 bytes = [] 604 for b in skipped: 605 if b.isalpha(): 606 bytes.append("'%s'" % b) 607 else: 608 bytes.append("0x%x" % ord(b)) 609 op_str = ", ".join(bytes) 610 disasm = mnemonic + op_str 611 612 # Add the decoded instruction to the list. 613 result.append(( 614 address + offset, 615 length, 616 disasm, 617 hexdump, 618 )) 619 620 # Update the offset. 621 offset += length 622 623 # Return the list of decoded instructions. 624 return result 625 626#============================================================================== 627 628# TODO: use a lock to access __decoder 629# TODO: look in sys.modules for whichever disassembler is already loaded 630 631class Disassembler (object): 632 """ 633 Generic disassembler. Uses a set of adapters to decide which library to 634 load for which supported platform. 635 636 @type engines: tuple( L{Engine} ) 637 @cvar engines: Set of supported engines. If you implement your own adapter 638 you can add its class here to make it available to L{Disassembler}. 639 Supported disassemblers are: 640 """ 641 642 engines = ( 643 DistormEngine, # diStorm engine goes first for backwards compatibility 644 BeaEngine, 645 CapstoneEngine, 646 LibdisassembleEngine, 647 PyDasmEngine, 648 ) 649 650 # Add the list of supported disassemblers to the docstring. 651 __doc__ += "\n" 652 for e in engines: 653 __doc__ += " - %s - %s (U{%s})\n" % (e.name, e.desc, e.url) 654 del e 655 656 # Cache of already loaded disassemblers. 657 __decoder = {} 658 659 def __new__(cls, arch = None, engine = None): 660 """ 661 Factory class. You can't really instance a L{Disassembler} object, 662 instead one of the adapter L{Engine} subclasses is returned. 663 664 @type arch: str 665 @param arch: (Optional) Name of the processor architecture. 666 If not provided the current processor architecture is assumed. 667 For more details see L{win32.version._get_arch}. 668 669 @type engine: str 670 @param engine: (Optional) Name of the disassembler engine. 671 If not provided a compatible one is loaded automatically. 672 See: L{Engine.name} 673 674 @raise NotImplementedError: No compatible disassembler was found that 675 could decode machine code for the requested architecture. This may 676 be due to missing dependencies. 677 678 @raise ValueError: An unknown engine name was supplied. 679 """ 680 681 # Use the default architecture if none specified. 682 if not arch: 683 arch = win32.arch 684 685 # Return a compatible engine if none specified. 686 if not engine: 687 found = False 688 for clazz in cls.engines: 689 try: 690 if arch in clazz.supported: 691 selected = (clazz.name, arch) 692 try: 693 decoder = cls.__decoder[selected] 694 except KeyError: 695 decoder = clazz(arch) 696 cls.__decoder[selected] = decoder 697 return decoder 698 except NotImplementedError: 699 pass 700 msg = "No disassembler engine available for %s code." % arch 701 raise NotImplementedError(msg) 702 703 # Return the specified engine. 704 selected = (engine, arch) 705 try: 706 decoder = cls.__decoder[selected] 707 except KeyError: 708 found = False 709 engineLower = engine.lower() 710 for clazz in cls.engines: 711 if clazz.name.lower() == engineLower: 712 found = True 713 break 714 if not found: 715 msg = "Unsupported disassembler engine: %s" % engine 716 raise ValueError(msg) 717 if arch not in clazz.supported: 718 msg = "The %s engine cannot decode %s code." % selected 719 raise NotImplementedError(msg) 720 decoder = clazz(arch) 721 cls.__decoder[selected] = decoder 722 return decoder 723