1#===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===# 2# 3# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4# See https://llvm.org/LICENSE.txt for license information. 5# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6# 7#===------------------------------------------------------------------------===# 8 9from ctypes import CFUNCTYPE 10from ctypes import POINTER 11from ctypes import addressof 12from ctypes import c_byte 13from ctypes import c_char_p 14from ctypes import c_int 15from ctypes import c_size_t 16from ctypes import c_ubyte 17from ctypes import c_uint64 18from ctypes import c_void_p 19from ctypes import cast 20 21from .common import LLVMObject 22from .common import c_object_p 23from .common import get_library 24 25__all__ = [ 26 'Disassembler', 27] 28 29lib = get_library() 30callbacks = {} 31 32# Constants for set_options 33Option_UseMarkup = 1 34 35 36 37_initialized = False 38_targets = ['AArch64', 'ARM', 'Hexagon', 'MSP430', 'Mips', 'NVPTX', 'PowerPC', 'R600', 'Sparc', 'SystemZ', 'X86', 'XCore'] 39def _ensure_initialized(): 40 global _initialized 41 if not _initialized: 42 # Here one would want to call the functions 43 # LLVMInitializeAll{TargetInfo,TargetMC,Disassembler}s, but 44 # unfortunately they are only defined as static inline 45 # functions in the header files of llvm-c, so they don't exist 46 # as symbols in the shared library. 47 # So until that is fixed use this hack to initialize them all 48 for tgt in _targets: 49 for initializer in ("TargetInfo", "TargetMC", "Disassembler"): 50 try: 51 f = getattr(lib, "LLVMInitialize" + tgt + initializer) 52 except AttributeError: 53 continue 54 f() 55 _initialized = True 56 57 58class Disassembler(LLVMObject): 59 """Represents a disassembler instance. 60 61 Disassembler instances are tied to specific "triple," which must be defined 62 at creation time. 63 64 Disassembler instances can disassemble instructions from multiple sources. 65 """ 66 def __init__(self, triple): 67 """Create a new disassembler instance. 68 69 The triple argument is the triple to create the disassembler for. This 70 is something like 'i386-apple-darwin9'. 71 """ 72 73 _ensure_initialized() 74 75 ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0), 76 callbacks['op_info'](0), callbacks['symbol_lookup'](0)) 77 if not ptr: 78 raise Exception('Could not obtain disassembler for triple: %s' % 79 triple) 80 81 LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose) 82 83 def get_instruction(self, source, pc=0): 84 """Obtain the next instruction from an input source. 85 86 The input source should be a str or bytearray or something that 87 represents a sequence of bytes. 88 89 This function will start reading bytes from the beginning of the 90 source. 91 92 The pc argument specifies the address that the first byte is at. 93 94 This returns a 2-tuple of: 95 96 long number of bytes read. 0 if no instruction was read. 97 str representation of instruction. This will be the assembly that 98 represents the instruction. 99 """ 100 buf = cast(c_char_p(source), POINTER(c_ubyte)) 101 out_str = cast((c_byte * 255)(), c_char_p) 102 103 result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)), 104 c_uint64(pc), out_str, 255) 105 106 return (result, out_str.value) 107 108 def get_instructions(self, source, pc=0): 109 """Obtain multiple instructions from an input source. 110 111 This is like get_instruction() except it is a generator for all 112 instructions within the source. It starts at the beginning of the 113 source and reads instructions until no more can be read. 114 115 This generator returns 3-tuple of: 116 117 long address of instruction. 118 long size of instruction, in bytes. 119 str representation of instruction. 120 """ 121 source_bytes = c_char_p(source) 122 out_str = cast((c_byte * 255)(), c_char_p) 123 124 # This could probably be written cleaner. But, it does work. 125 buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents 126 offset = 0 127 address = pc 128 end_address = pc + len(source) 129 while address < end_address: 130 b = cast(addressof(buf) + offset, POINTER(c_ubyte)) 131 result = lib.LLVMDisasmInstruction(self, b, 132 c_uint64(len(source) - offset), c_uint64(address), 133 out_str, 255) 134 135 if result == 0: 136 break 137 138 yield (address, result, out_str.value) 139 140 address += result 141 offset += result 142 143 def set_options(self, options): 144 if not lib.LLVMSetDisasmOptions(self, options): 145 raise Exception('Unable to set all disassembler options in %i' % options) 146 147 148def register_library(library): 149 library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int, 150 callbacks['op_info'], callbacks['symbol_lookup']] 151 library.LLVMCreateDisasm.restype = c_object_p 152 153 library.LLVMDisasmDispose.argtypes = [Disassembler] 154 155 library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte), 156 c_uint64, c_uint64, c_char_p, c_size_t] 157 library.LLVMDisasmInstruction.restype = c_size_t 158 159 library.LLVMSetDisasmOptions.argtypes = [Disassembler, c_uint64] 160 library.LLVMSetDisasmOptions.restype = c_int 161 162 163callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64, 164 c_int, c_void_p) 165callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64, 166 POINTER(c_uint64), c_uint64, 167 POINTER(c_char_p)) 168 169register_library(lib) 170