1#===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===#
2#
3#                     The LLVM Compiler Infrastructure
4#
5# This file is distributed under the University of Illinois Open Source
6# License. See LICENSE.TXT for details.
7#
8#===------------------------------------------------------------------------===#
9
10from ctypes import CFUNCTYPE
11from ctypes import POINTER
12from ctypes import addressof
13from ctypes import c_byte
14from ctypes import c_char_p
15from ctypes import c_int
16from ctypes import c_size_t
17from ctypes import c_ubyte
18from ctypes import c_uint64
19from ctypes import c_void_p
20from ctypes import cast
21
22from .common import LLVMObject
23from .common import c_object_p
24from .common import get_library
25
26__all__ = [
27    'Disassembler',
28]
29
30lib = get_library()
31callbacks = {}
32
33# Constants for set_options
34Option_UseMarkup = 1
35
36
37
38_initialized = False
39_targets = ['AArch64', 'ARM', 'Hexagon', 'MSP430', 'Mips', 'NVPTX', 'PowerPC', 'R600', 'Sparc', 'SystemZ', 'X86', 'XCore']
40def _ensure_initialized():
41    global _initialized
42    if not _initialized:
43        # Here one would want to call the functions
44        # LLVMInitializeAll{TargetInfo,TargetMC,Disassembler}s, but
45        # unfortunately they are only defined as static inline
46        # functions in the header files of llvm-c, so they don't exist
47        # as symbols in the shared library.
48        # So until that is fixed use this hack to initialize them all
49        for tgt in _targets:
50            for initializer in ("TargetInfo", "TargetMC", "Disassembler"):
51                try:
52                    f = getattr(lib, "LLVMInitialize" + tgt + initializer)
53                except AttributeError:
54                    continue
55                f()
56        _initialized = True
57
58
59class Disassembler(LLVMObject):
60    """Represents a disassembler instance.
61
62    Disassembler instances are tied to specific "triple," which must be defined
63    at creation time.
64
65    Disassembler instances can disassemble instructions from multiple sources.
66    """
67    def __init__(self, triple):
68        """Create a new disassembler instance.
69
70        The triple argument is the triple to create the disassembler for. This
71        is something like 'i386-apple-darwin9'.
72        """
73
74        _ensure_initialized()
75
76        ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0),
77                callbacks['op_info'](0), callbacks['symbol_lookup'](0))
78        if not ptr:
79            raise Exception('Could not obtain disassembler for triple: %s' %
80                            triple)
81
82        LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose)
83
84    def get_instruction(self, source, pc=0):
85        """Obtain the next instruction from an input source.
86
87        The input source should be a str or bytearray or something that
88        represents a sequence of bytes.
89
90        This function will start reading bytes from the beginning of the
91        source.
92
93        The pc argument specifies the address that the first byte is at.
94
95        This returns a 2-tuple of:
96
97          long number of bytes read. 0 if no instruction was read.
98          str representation of instruction. This will be the assembly that
99            represents the instruction.
100        """
101        buf = cast(c_char_p(source), POINTER(c_ubyte))
102        out_str = cast((c_byte * 255)(), c_char_p)
103
104        result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)),
105                                           c_uint64(pc), out_str, 255)
106
107        return (result, out_str.value)
108
109    def get_instructions(self, source, pc=0):
110        """Obtain multiple instructions from an input source.
111
112        This is like get_instruction() except it is a generator for all
113        instructions within the source. It starts at the beginning of the
114        source and reads instructions until no more can be read.
115
116        This generator returns 3-tuple of:
117
118          long address of instruction.
119          long size of instruction, in bytes.
120          str representation of instruction.
121        """
122        source_bytes = c_char_p(source)
123        out_str = cast((c_byte * 255)(), c_char_p)
124
125        # This could probably be written cleaner. But, it does work.
126        buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents
127        offset = 0
128        address = pc
129        end_address = pc + len(source)
130        while address < end_address:
131            b = cast(addressof(buf) + offset, POINTER(c_ubyte))
132            result = lib.LLVMDisasmInstruction(self, b,
133                    c_uint64(len(source) - offset), c_uint64(address),
134                    out_str, 255)
135
136            if result == 0:
137                break
138
139            yield (address, result, out_str.value)
140
141            address += result
142            offset += result
143
144    def set_options(self, options):
145        if not lib.LLVMSetDisasmOptions(self, options):
146            raise Exception('Unable to set all disassembler options in %i' % options)
147
148
149def register_library(library):
150    library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int,
151        callbacks['op_info'], callbacks['symbol_lookup']]
152    library.LLVMCreateDisasm.restype = c_object_p
153
154    library.LLVMDisasmDispose.argtypes = [Disassembler]
155
156    library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte),
157            c_uint64, c_uint64, c_char_p, c_size_t]
158    library.LLVMDisasmInstruction.restype = c_size_t
159
160    library.LLVMSetDisasmOptions.argtypes = [Disassembler, c_uint64]
161    library.LLVMSetDisasmOptions.restype = c_int
162
163
164callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64,
165                                 c_int, c_void_p)
166callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64,
167                                       POINTER(c_uint64), c_uint64,
168                                       POINTER(c_char_p))
169
170register_library(lib)
171