1#===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===#
2#
3# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4# See https://llvm.org/LICENSE.txt for license information.
5# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6#
7#===------------------------------------------------------------------------===#
8
9from ctypes import CFUNCTYPE
10from ctypes import POINTER
11from ctypes import addressof
12from ctypes import c_byte
13from ctypes import c_char_p
14from ctypes import c_int
15from ctypes import c_size_t
16from ctypes import c_ubyte
17from ctypes import c_uint64
18from ctypes import c_void_p
19from ctypes import cast
20
21from .common import LLVMObject
22from .common import c_object_p
23from .common import get_library
24
25__all__ = [
26    'Disassembler',
27]
28
29lib = get_library()
30callbacks = {}
31
32# Constants for set_options
33Option_UseMarkup = 1
34
35
36
37_initialized = False
38_targets = ['AArch64', 'ARM', 'Hexagon', 'MSP430', 'Mips', 'NVPTX', 'PowerPC', 'R600', 'Sparc', 'SystemZ', 'X86', 'XCore']
39def _ensure_initialized():
40    global _initialized
41    if not _initialized:
42        # Here one would want to call the functions
43        # LLVMInitializeAll{TargetInfo,TargetMC,Disassembler}s, but
44        # unfortunately they are only defined as static inline
45        # functions in the header files of llvm-c, so they don't exist
46        # as symbols in the shared library.
47        # So until that is fixed use this hack to initialize them all
48        for tgt in _targets:
49            for initializer in ("TargetInfo", "TargetMC", "Disassembler"):
50                try:
51                    f = getattr(lib, "LLVMInitialize" + tgt + initializer)
52                except AttributeError:
53                    continue
54                f()
55        _initialized = True
56
57
58class Disassembler(LLVMObject):
59    """Represents a disassembler instance.
60
61    Disassembler instances are tied to specific "triple," which must be defined
62    at creation time.
63
64    Disassembler instances can disassemble instructions from multiple sources.
65    """
66    def __init__(self, triple):
67        """Create a new disassembler instance.
68
69        The triple argument is the triple to create the disassembler for. This
70        is something like 'i386-apple-darwin9'.
71        """
72
73        _ensure_initialized()
74
75        ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0),
76                callbacks['op_info'](0), callbacks['symbol_lookup'](0))
77        if not ptr:
78            raise Exception('Could not obtain disassembler for triple: %s' %
79                            triple)
80
81        LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose)
82
83    def get_instruction(self, source, pc=0):
84        """Obtain the next instruction from an input source.
85
86        The input source should be a str or bytearray or something that
87        represents a sequence of bytes.
88
89        This function will start reading bytes from the beginning of the
90        source.
91
92        The pc argument specifies the address that the first byte is at.
93
94        This returns a 2-tuple of:
95
96          long number of bytes read. 0 if no instruction was read.
97          str representation of instruction. This will be the assembly that
98            represents the instruction.
99        """
100        buf = cast(c_char_p(source), POINTER(c_ubyte))
101        out_str = cast((c_byte * 255)(), c_char_p)
102
103        result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)),
104                                           c_uint64(pc), out_str, 255)
105
106        return (result, out_str.value)
107
108    def get_instructions(self, source, pc=0):
109        """Obtain multiple instructions from an input source.
110
111        This is like get_instruction() except it is a generator for all
112        instructions within the source. It starts at the beginning of the
113        source and reads instructions until no more can be read.
114
115        This generator returns 3-tuple of:
116
117          long address of instruction.
118          long size of instruction, in bytes.
119          str representation of instruction.
120        """
121        source_bytes = c_char_p(source)
122        out_str = cast((c_byte * 255)(), c_char_p)
123
124        # This could probably be written cleaner. But, it does work.
125        buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents
126        offset = 0
127        address = pc
128        end_address = pc + len(source)
129        while address < end_address:
130            b = cast(addressof(buf) + offset, POINTER(c_ubyte))
131            result = lib.LLVMDisasmInstruction(self, b,
132                    c_uint64(len(source) - offset), c_uint64(address),
133                    out_str, 255)
134
135            if result == 0:
136                break
137
138            yield (address, result, out_str.value)
139
140            address += result
141            offset += result
142
143    def set_options(self, options):
144        if not lib.LLVMSetDisasmOptions(self, options):
145            raise Exception('Unable to set all disassembler options in %i' % options)
146
147
148def register_library(library):
149    library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int,
150        callbacks['op_info'], callbacks['symbol_lookup']]
151    library.LLVMCreateDisasm.restype = c_object_p
152
153    library.LLVMDisasmDispose.argtypes = [Disassembler]
154
155    library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte),
156            c_uint64, c_uint64, c_char_p, c_size_t]
157    library.LLVMDisasmInstruction.restype = c_size_t
158
159    library.LLVMSetDisasmOptions.argtypes = [Disassembler, c_uint64]
160    library.LLVMSetDisasmOptions.restype = c_int
161
162
163callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64,
164                                 c_int, c_void_p)
165callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64,
166                                       POINTER(c_uint64), c_uint64,
167                                       POINTER(c_char_p))
168
169register_library(lib)
170