1# Capstone Python bindings, by Nguyen Anh Quynnh <aquynh@gmail.com>
2import os, sys
3from platform import system
4_python2 = sys.version_info[0] < 3
5if _python2:
6    range = xrange
7
8__all__ = [
9    'Cs',
10    'CsInsn',
11
12    'cs_disasm_quick',
13    'cs_disasm_lite',
14    'cs_version',
15    'cs_support',
16    'version_bind',
17    'debug',
18
19    'CS_API_MAJOR',
20    'CS_API_MINOR',
21
22    'CS_VERSION_MAJOR',
23    'CS_VERSION_MINOR',
24    'CS_VERSION_EXTRA',
25
26    'CS_ARCH_ARM',
27    'CS_ARCH_ARM64',
28    'CS_ARCH_MIPS',
29    'CS_ARCH_X86',
30    'CS_ARCH_PPC',
31    'CS_ARCH_SPARC',
32    'CS_ARCH_SYSZ',
33    'CS_ARCH_XCORE',
34    'CS_ARCH_M68K',
35    'CS_ARCH_TMS320C64X',
36    'CS_ARCH_M680X',
37    'CS_ARCH_EVM',
38    'CS_ARCH_ALL',
39
40    'CS_MODE_LITTLE_ENDIAN',
41    'CS_MODE_BIG_ENDIAN',
42    'CS_MODE_16',
43    'CS_MODE_32',
44    'CS_MODE_64',
45    'CS_MODE_ARM',
46    'CS_MODE_THUMB',
47    'CS_MODE_MCLASS',
48    'CS_MODE_MICRO',
49    'CS_MODE_MIPS3',
50    'CS_MODE_MIPS32R6',
51    'CS_MODE_MIPS2',
52    'CS_MODE_V8',
53    'CS_MODE_V9',
54    'CS_MODE_QPX',
55    'CS_MODE_M68K_000',
56    'CS_MODE_M68K_010',
57    'CS_MODE_M68K_020',
58    'CS_MODE_M68K_030',
59    'CS_MODE_M68K_040',
60    'CS_MODE_M68K_060',
61    'CS_MODE_MIPS32',
62    'CS_MODE_MIPS64',
63    'CS_MODE_M680X_6301',
64    'CS_MODE_M680X_6309',
65    'CS_MODE_M680X_6800',
66    'CS_MODE_M680X_6801',
67    'CS_MODE_M680X_6805',
68    'CS_MODE_M680X_6808',
69    'CS_MODE_M680X_6809',
70    'CS_MODE_M680X_6811',
71    'CS_MODE_M680X_CPU12',
72    'CS_MODE_M680X_HCS08',
73
74    'CS_OPT_SYNTAX',
75    'CS_OPT_SYNTAX_DEFAULT',
76    'CS_OPT_SYNTAX_INTEL',
77    'CS_OPT_SYNTAX_ATT',
78    'CS_OPT_SYNTAX_NOREGNAME',
79    'CS_OPT_SYNTAX_MASM',
80
81    'CS_OPT_DETAIL',
82    'CS_OPT_MODE',
83    'CS_OPT_ON',
84    'CS_OPT_OFF',
85
86    'CS_ERR_OK',
87    'CS_ERR_MEM',
88    'CS_ERR_ARCH',
89    'CS_ERR_HANDLE',
90    'CS_ERR_CSH',
91    'CS_ERR_MODE',
92    'CS_ERR_OPTION',
93    'CS_ERR_DETAIL',
94    'CS_ERR_VERSION',
95    'CS_ERR_MEMSETUP',
96    'CS_ERR_DIET',
97    'CS_ERR_SKIPDATA',
98    'CS_ERR_X86_ATT',
99    'CS_ERR_X86_INTEL',
100
101    'CS_SUPPORT_DIET',
102    'CS_SUPPORT_X86_REDUCE',
103    'CS_SKIPDATA_CALLBACK',
104
105    'CS_OP_INVALID',
106    'CS_OP_REG',
107    'CS_OP_IMM',
108    'CS_OP_MEM',
109    'CS_OP_FP',
110
111    'CS_GRP_INVALID',
112    'CS_GRP_JUMP',
113    'CS_GRP_CALL',
114    'CS_GRP_RET',
115    'CS_GRP_INT',
116    'CS_GRP_IRET',
117    'CS_GRP_PRIVILEGE',
118
119    'CS_AC_INVALID',
120    'CS_AC_READ',
121    'CS_AC_WRITE',
122
123    'CsError',
124
125    '__version__',
126]
127
128# Capstone C interface
129
130# API version
131CS_API_MAJOR = 4
132CS_API_MINOR = 0
133
134# Package version
135CS_VERSION_MAJOR = CS_API_MAJOR
136CS_VERSION_MINOR = CS_API_MINOR
137CS_VERSION_EXTRA = 2
138
139__version__ = "%u.%u.%u" %(CS_VERSION_MAJOR, CS_VERSION_MINOR, CS_VERSION_EXTRA)
140
141# architectures
142CS_ARCH_ARM = 0
143CS_ARCH_ARM64 = 1
144CS_ARCH_MIPS = 2
145CS_ARCH_X86 = 3
146CS_ARCH_PPC = 4
147CS_ARCH_SPARC = 5
148CS_ARCH_SYSZ = 6
149CS_ARCH_XCORE = 7
150CS_ARCH_M68K = 8
151CS_ARCH_TMS320C64X = 9
152CS_ARCH_M680X = 10
153CS_ARCH_EVM = 11
154CS_ARCH_MAX = 12
155CS_ARCH_ALL = 0xFFFF
156
157# disasm mode
158CS_MODE_LITTLE_ENDIAN = 0      # little-endian mode (default mode)
159CS_MODE_ARM = 0                # ARM mode
160CS_MODE_16 = (1 << 1)          # 16-bit mode (for X86)
161CS_MODE_32 = (1 << 2)          # 32-bit mode (for X86)
162CS_MODE_64 = (1 << 3)          # 64-bit mode (for X86, PPC)
163CS_MODE_THUMB = (1 << 4)       # ARM's Thumb mode, including Thumb-2
164CS_MODE_MCLASS = (1 << 5)      # ARM's Cortex-M series
165CS_MODE_V8 = (1 << 6)          # ARMv8 A32 encodings for ARM
166CS_MODE_MICRO = (1 << 4)       # MicroMips mode (MIPS architecture)
167CS_MODE_MIPS3 = (1 << 5)       # Mips III ISA
168CS_MODE_MIPS32R6 = (1 << 6)    # Mips32r6 ISA
169CS_MODE_MIPS2 = (1 << 7)       # Mips II ISA
170CS_MODE_V9 = (1 << 4)          # Sparc V9 mode (for Sparc)
171CS_MODE_QPX = (1 << 4)         # Quad Processing eXtensions mode (PPC)
172CS_MODE_M68K_000 = (1 << 1)    # M68K 68000 mode
173CS_MODE_M68K_010 = (1 << 2)    # M68K 68010 mode
174CS_MODE_M68K_020 = (1 << 3)    # M68K 68020 mode
175CS_MODE_M68K_030 = (1 << 4)    # M68K 68030 mode
176CS_MODE_M68K_040 = (1 << 5)    # M68K 68040 mode
177CS_MODE_M68K_060 = (1 << 6)    # M68K 68060 mode
178CS_MODE_BIG_ENDIAN = (1 << 31) # big-endian mode
179CS_MODE_MIPS32 = CS_MODE_32    # Mips32 ISA
180CS_MODE_MIPS64 = CS_MODE_64    # Mips64 ISA
181CS_MODE_M680X_6301 = (1 << 1)  # M680X HD6301/3 mode
182CS_MODE_M680X_6309 = (1 << 2)  # M680X HD6309 mode
183CS_MODE_M680X_6800 = (1 << 3)  # M680X M6800/2 mode
184CS_MODE_M680X_6801 = (1 << 4)  # M680X M6801/3 mode
185CS_MODE_M680X_6805 = (1 << 5)  # M680X M6805 mode
186CS_MODE_M680X_6808 = (1 << 6)  # M680X M68HC08 mode
187CS_MODE_M680X_6809 = (1 << 7)  # M680X M6809 mode
188CS_MODE_M680X_6811 = (1 << 8)  # M680X M68HC11 mode
189CS_MODE_M680X_CPU12 = (1 << 9)  # M680X CPU12 mode
190CS_MODE_M680X_HCS08 = (1 << 10)  # M680X HCS08 mode
191
192# Capstone option type
193CS_OPT_SYNTAX = 1    # Intel X86 asm syntax (CS_ARCH_X86 arch)
194CS_OPT_DETAIL = 2    # Break down instruction structure into details
195CS_OPT_MODE = 3      # Change engine's mode at run-time
196CS_OPT_MEM = 4       # Change engine's mode at run-time
197CS_OPT_SKIPDATA = 5  # Skip data when disassembling
198CS_OPT_SKIPDATA_SETUP = 6      # Setup user-defined function for SKIPDATA option
199CS_OPT_MNEMONIC = 7  # Customize instruction mnemonic
200CS_OPT_UNSIGNED = 8  # Print immediate in unsigned form
201
202# Capstone option value
203CS_OPT_OFF = 0             # Turn OFF an option - default option of CS_OPT_DETAIL
204CS_OPT_ON = 3              # Turn ON an option (CS_OPT_DETAIL)
205
206# Common instruction operand types - to be consistent across all architectures.
207CS_OP_INVALID = 0
208CS_OP_REG = 1
209CS_OP_IMM = 2
210CS_OP_MEM = 3
211CS_OP_FP  = 4
212
213# Common instruction groups - to be consistent across all architectures.
214CS_GRP_INVALID = 0  # uninitialized/invalid group.
215CS_GRP_JUMP    = 1  # all jump instructions (conditional+direct+indirect jumps)
216CS_GRP_CALL    = 2  # all call instructions
217CS_GRP_RET     = 3  # all return instructions
218CS_GRP_INT     = 4  # all interrupt instructions (int+syscall)
219CS_GRP_IRET    = 5  # all interrupt return instructions
220CS_GRP_PRIVILEGE = 6  # all privileged instructions
221
222# Access types for instruction operands.
223CS_AC_INVALID  = 0        # Invalid/unitialized access type.
224CS_AC_READ     = (1 << 0) # Operand that is read from.
225CS_AC_WRITE    = (1 << 1) # Operand that is written to.
226
227# Capstone syntax value
228CS_OPT_SYNTAX_DEFAULT = 0    # Default assembly syntax of all platforms (CS_OPT_SYNTAX)
229CS_OPT_SYNTAX_INTEL = 1    # Intel X86 asm syntax - default syntax on X86 (CS_OPT_SYNTAX, CS_ARCH_X86)
230CS_OPT_SYNTAX_ATT = 2      # ATT asm syntax (CS_OPT_SYNTAX, CS_ARCH_X86)
231CS_OPT_SYNTAX_NOREGNAME = 3   # Asm syntax prints register name with only number - (CS_OPT_SYNTAX, CS_ARCH_PPC, CS_ARCH_ARM)
232CS_OPT_SYNTAX_MASM = 4      # MASM syntax (CS_OPT_SYNTAX, CS_ARCH_X86)
233
234# Capstone error type
235CS_ERR_OK = 0      # No error: everything was fine
236CS_ERR_MEM = 1     # Out-Of-Memory error: cs_open(), cs_disasm()
237CS_ERR_ARCH = 2    # Unsupported architecture: cs_open()
238CS_ERR_HANDLE = 3  # Invalid handle: cs_op_count(), cs_op_index()
239CS_ERR_CSH = 4     # Invalid csh argument: cs_close(), cs_errno(), cs_option()
240CS_ERR_MODE = 5    # Invalid/unsupported mode: cs_open()
241CS_ERR_OPTION = 6  # Invalid/unsupported option: cs_option()
242CS_ERR_DETAIL = 7  # Invalid/unsupported option: cs_option()
243CS_ERR_MEMSETUP = 8
244CS_ERR_VERSION = 9 # Unsupported version (bindings)
245CS_ERR_DIET = 10   # Information irrelevant in diet engine
246CS_ERR_SKIPDATA = 11 # Access irrelevant data for "data" instruction in SKIPDATA mode
247CS_ERR_X86_ATT = 12 # X86 AT&T syntax is unsupported (opt-out at compile time)
248CS_ERR_X86_INTEL = 13 # X86 Intel syntax is unsupported (opt-out at compile time)
249CS_ERR_X86_MASM = 14 # X86 Intel syntax is unsupported (opt-out at compile time)
250
251# query id for cs_support()
252CS_SUPPORT_DIET = CS_ARCH_ALL + 1
253CS_SUPPORT_X86_REDUCE = CS_ARCH_ALL+2
254
255# Capstone reverse lookup
256CS_AC    = {v:k for k,v in locals().items() if k.startswith('CS_AC_')}
257CS_ARCH  = {v:k for k,v in locals().items() if k.startswith('CS_ARCH_')}
258CS_ERR   = {v:k for k,v in locals().items() if k.startswith('CS_ERR_')}
259CS_GRP   = {v:k for k,v in locals().items() if k.startswith('CS_GRP_')}
260CS_MODE  = {v:k for k,v in locals().items() if k.startswith('CS_MODE_')}
261CS_OP    = {v:k for k,v in locals().items() if k.startswith('CS_OP_')}
262CS_OPT   = {v:k for k,v in locals().items() if k.startswith('CS_OPT_')}
263
264import ctypes, ctypes.util
265from os.path import split, join, dirname
266import distutils.sysconfig
267import pkg_resources
268
269import inspect
270if not hasattr(sys.modules[__name__], '__file__'):
271    __file__ = inspect.getfile(inspect.currentframe())
272
273if sys.platform == 'darwin':
274    _lib = "libcapstone.dylib"
275elif sys.platform in ('win32', 'cygwin'):
276    _lib = "capstone.dll"
277else:
278    _lib = "libcapstone.so"
279
280_found = False
281
282def _load_lib(path):
283    lib_file = join(path, _lib)
284    if os.path.exists(lib_file):
285        return ctypes.cdll.LoadLibrary(lib_file)
286    else:
287        # if we're on linux, try again with .so.4 extension
288        if lib_file.endswith('.so'):
289            if os.path.exists(lib_file + '.4'):
290                return ctypes.cdll.LoadLibrary(lib_file + '.4')
291    return None
292
293_cs = None
294
295# Loading attempts, in order
296# - user-provided environment variable
297# - pkg_resources can get us the path to the local libraries
298# - we can get the path to the local libraries by parsing our filename
299# - global load
300# - python's lib directory
301# - last-gasp attempt at some hardcoded paths on darwin and linux
302
303_path_list = [os.getenv('LIBCAPSTONE_PATH', None),
304              pkg_resources.resource_filename(__name__, 'lib'),
305              join(split(__file__)[0], 'lib'),
306              '',
307              distutils.sysconfig.get_python_lib(),
308              "/usr/local/lib/" if sys.platform == 'darwin' else '/usr/lib64']
309
310for _path in _path_list:
311    if _path is None: continue
312    _cs = _load_lib(_path)
313    if _cs is not None: break
314else:
315    raise ImportError("ERROR: fail to load the dynamic library.")
316
317
318# low-level structure for C code
319
320def copy_ctypes(src):
321    """Returns a new ctypes object which is a bitwise copy of an existing one"""
322    dst = type(src)()
323    ctypes.memmove(ctypes.byref(dst), ctypes.byref(src), ctypes.sizeof(type(src)))
324    return dst
325
326def copy_ctypes_list(src):
327    return [copy_ctypes(n) for n in src]
328
329# Weird import placement because these modules are needed by the below code but need the above functions
330from . import arm, arm64, m68k, mips, ppc, sparc, systemz, x86, xcore, tms320c64x, m680x, evm
331
332class _cs_arch(ctypes.Union):
333    _fields_ = (
334        ('arm64', arm64.CsArm64),
335        ('arm', arm.CsArm),
336        ('m68k', m68k.CsM68K),
337        ('mips', mips.CsMips),
338        ('x86', x86.CsX86),
339        ('ppc', ppc.CsPpc),
340        ('sparc', sparc.CsSparc),
341        ('sysz', systemz.CsSysz),
342        ('xcore', xcore.CsXcore),
343        ('tms320c64x', tms320c64x.CsTMS320C64x),
344        ('m680x', m680x.CsM680x),
345        ('evm', evm.CsEvm),
346    )
347
348class _cs_detail(ctypes.Structure):
349    _fields_ = (
350        ('regs_read', ctypes.c_uint16 * 12),
351        ('regs_read_count', ctypes.c_ubyte),
352        ('regs_write', ctypes.c_uint16 * 20),
353        ('regs_write_count', ctypes.c_ubyte),
354        ('groups', ctypes.c_ubyte * 8),
355        ('groups_count', ctypes.c_ubyte),
356        ('arch', _cs_arch),
357    )
358
359class _cs_insn(ctypes.Structure):
360    _fields_ = (
361        ('id', ctypes.c_uint),
362        ('address', ctypes.c_uint64),
363        ('size', ctypes.c_uint16),
364        ('bytes', ctypes.c_ubyte * 16),
365        ('mnemonic', ctypes.c_char * 32),
366        ('op_str', ctypes.c_char * 160),
367        ('detail', ctypes.POINTER(_cs_detail)),
368    )
369
370# callback for SKIPDATA option
371CS_SKIPDATA_CALLBACK = ctypes.CFUNCTYPE(ctypes.c_size_t, ctypes.POINTER(ctypes.c_char), ctypes.c_size_t, ctypes.c_size_t, ctypes.c_void_p)
372
373class _cs_opt_skipdata(ctypes.Structure):
374    _fields_ = (
375        ('mnemonic', ctypes.c_char_p),
376        ('callback', CS_SKIPDATA_CALLBACK),
377        ('user_data', ctypes.c_void_p),
378    )
379
380class _cs_opt_mnem(ctypes.Structure):
381    _fields_ = (
382        ('id', ctypes.c_uint),
383        ('mnemonic', ctypes.c_char_p),
384    )
385
386# setup all the function prototype
387def _setup_prototype(lib, fname, restype, *argtypes):
388    getattr(lib, fname).restype = restype
389    getattr(lib, fname).argtypes = argtypes
390
391_setup_prototype(_cs, "cs_open", ctypes.c_int, ctypes.c_uint, ctypes.c_uint, ctypes.POINTER(ctypes.c_size_t))
392_setup_prototype(_cs, "cs_disasm", ctypes.c_size_t, ctypes.c_size_t, ctypes.POINTER(ctypes.c_char), ctypes.c_size_t, \
393        ctypes.c_uint64, ctypes.c_size_t, ctypes.POINTER(ctypes.POINTER(_cs_insn)))
394_setup_prototype(_cs, "cs_free", None, ctypes.c_void_p, ctypes.c_size_t)
395_setup_prototype(_cs, "cs_close", ctypes.c_int, ctypes.POINTER(ctypes.c_size_t))
396_setup_prototype(_cs, "cs_reg_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint)
397_setup_prototype(_cs, "cs_insn_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint)
398_setup_prototype(_cs, "cs_group_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint)
399_setup_prototype(_cs, "cs_op_count", ctypes.c_int, ctypes.c_size_t, ctypes.POINTER(_cs_insn), ctypes.c_uint)
400_setup_prototype(_cs, "cs_op_index", ctypes.c_int, ctypes.c_size_t, ctypes.POINTER(_cs_insn), ctypes.c_uint, ctypes.c_uint)
401_setup_prototype(_cs, "cs_errno", ctypes.c_int, ctypes.c_size_t)
402_setup_prototype(_cs, "cs_option", ctypes.c_int, ctypes.c_size_t, ctypes.c_int, ctypes.c_void_p)
403_setup_prototype(_cs, "cs_version", ctypes.c_int, ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_int))
404_setup_prototype(_cs, "cs_support", ctypes.c_bool, ctypes.c_int)
405_setup_prototype(_cs, "cs_strerror", ctypes.c_char_p, ctypes.c_int)
406_setup_prototype(_cs, "cs_regs_access", ctypes.c_int, ctypes.c_size_t, ctypes.POINTER(_cs_insn), ctypes.POINTER(ctypes.c_uint16*64), ctypes.POINTER(ctypes.c_uint8), ctypes.POINTER(ctypes.c_uint16*64), ctypes.POINTER(ctypes.c_uint8))
407
408
409# access to error code via @errno of CsError
410class CsError(Exception):
411    def __init__(self, errno):
412        self.errno = errno
413
414    if _python2:
415        def __str__(self):
416            return _cs.cs_strerror(self.errno)
417
418    else:
419        def __str__(self):
420            return _cs.cs_strerror(self.errno).decode()
421
422
423# return the core's version
424def cs_version():
425    major = ctypes.c_int()
426    minor = ctypes.c_int()
427    combined = _cs.cs_version(ctypes.byref(major), ctypes.byref(minor))
428    return (major.value, minor.value, combined)
429
430
431# return the binding's version
432def version_bind():
433    return (CS_API_MAJOR, CS_API_MINOR, (CS_API_MAJOR << 8) + CS_API_MINOR)
434
435
436def cs_support(query):
437    return _cs.cs_support(query)
438
439
440# dummy class resembling Cs class, just for cs_disasm_quick()
441# this class only need to be referenced to via 2 fields: @csh & @arch
442class _dummy_cs(object):
443    def __init__(self, csh, arch):
444        self.csh = csh
445        self.arch = arch
446        self._detail = False
447
448
449# Quick & dirty Python function to disasm raw binary code
450# This function return CsInsn objects
451# NOTE: you might want to use more efficient Cs class & its methods.
452def cs_disasm_quick(arch, mode, code, offset, count=0):
453    # verify version compatibility with the core before doing anything
454    (major, minor, _combined) = cs_version()
455    if major != CS_API_MAJOR or minor != CS_API_MINOR:
456        # our binding version is different from the core's API version
457        raise CsError(CS_ERR_VERSION)
458
459    csh = ctypes.c_size_t()
460    status = _cs.cs_open(arch, mode, ctypes.byref(csh))
461    if status != CS_ERR_OK:
462        raise CsError(status)
463
464    all_insn = ctypes.POINTER(_cs_insn)()
465    res = _cs.cs_disasm(csh, code, len(code), offset, count, ctypes.byref(all_insn))
466    if res > 0:
467        try:
468            for i in range(res):
469                yield CsInsn(_dummy_cs(csh, arch), all_insn[i])
470        finally:
471            _cs.cs_free(all_insn, res)
472    else:
473        status = _cs.cs_errno(csh)
474        if status != CS_ERR_OK:
475            raise CsError(status)
476        return
477        yield
478
479    status = _cs.cs_close(ctypes.byref(csh))
480    if status != CS_ERR_OK:
481        raise CsError(status)
482
483
484# Another quick, but lighter function to disasm raw binary code.
485# This function is faster than cs_disasm_quick() around 20% because
486# cs_disasm_lite() only return tuples of (address, size, mnemonic, op_str),
487# rather than CsInsn objects.
488# NOTE: you might want to use more efficient Cs class & its methods.
489def cs_disasm_lite(arch, mode, code, offset, count=0):
490    # verify version compatibility with the core before doing anything
491    (major, minor, _combined) = cs_version()
492    if major != CS_API_MAJOR or minor != CS_API_MINOR:
493        # our binding version is different from the core's API version
494        raise CsError(CS_ERR_VERSION)
495
496    if cs_support(CS_SUPPORT_DIET):
497        # Diet engine cannot provide @mnemonic & @op_str
498        raise CsError(CS_ERR_DIET)
499
500    csh = ctypes.c_size_t()
501    status = _cs.cs_open(arch, mode, ctypes.byref(csh))
502    if status != CS_ERR_OK:
503        raise CsError(status)
504
505    all_insn = ctypes.POINTER(_cs_insn)()
506    res = _cs.cs_disasm(csh, code, len(code), offset, count, ctypes.byref(all_insn))
507    if res > 0:
508        try:
509            for i in range(res):
510                insn = all_insn[i]
511                yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii'))
512        finally:
513            _cs.cs_free(all_insn, res)
514    else:
515        status = _cs.cs_errno(csh)
516        if status != CS_ERR_OK:
517            raise CsError(status)
518        return
519        yield
520
521    status = _cs.cs_close(ctypes.byref(csh))
522    if status != CS_ERR_OK:
523        raise CsError(status)
524
525def _ascii_name_or_default(name, default):
526    return default if name is None else name.decode('ascii')
527
528
529# Python-style class to disasm code
530class CsInsn(object):
531    def __init__(self, cs, all_info):
532        self._raw = copy_ctypes(all_info)
533        self._cs = cs
534        if self._cs._detail and self._raw.id != 0:
535            # save detail
536            self._raw.detail = ctypes.pointer(all_info.detail._type_())
537            ctypes.memmove(ctypes.byref(self._raw.detail[0]), ctypes.byref(all_info.detail[0]), ctypes.sizeof(type(all_info.detail[0])))
538
539    def __repr__(self):
540        return '<CsInsn 0x%x [%s]: %s %s>' % (self.address, self.bytes.hex(), self.mnemonic, self.op_str)
541
542    # return instruction's ID.
543    @property
544    def id(self):
545        return self._raw.id
546
547    # return instruction's address.
548    @property
549    def address(self):
550        return self._raw.address
551
552    # return instruction's size.
553    @property
554    def size(self):
555        return self._raw.size
556
557    # return instruction's machine bytes (which should have @size bytes).
558    @property
559    def bytes(self):
560        return bytearray(self._raw.bytes)[:self._raw.size]
561
562    # return instruction's mnemonic.
563    @property
564    def mnemonic(self):
565        if self._cs._diet:
566            # Diet engine cannot provide @mnemonic.
567            raise CsError(CS_ERR_DIET)
568
569        return self._raw.mnemonic.decode('ascii')
570
571    # return instruction's operands (in string).
572    @property
573    def op_str(self):
574        if self._cs._diet:
575            # Diet engine cannot provide @op_str.
576            raise CsError(CS_ERR_DIET)
577
578        return self._raw.op_str.decode('ascii')
579
580    # return list of all implicit registers being read.
581    @property
582    def regs_read(self):
583        if self._raw.id == 0:
584            raise CsError(CS_ERR_SKIPDATA)
585
586        if self._cs._diet:
587            # Diet engine cannot provide @regs_read.
588            raise CsError(CS_ERR_DIET)
589
590        if self._cs._detail:
591            return self._raw.detail.contents.regs_read[:self._raw.detail.contents.regs_read_count]
592
593        raise CsError(CS_ERR_DETAIL)
594
595    # return list of all implicit registers being modified
596    @property
597    def regs_write(self):
598        if self._raw.id == 0:
599            raise CsError(CS_ERR_SKIPDATA)
600
601        if self._cs._diet:
602            # Diet engine cannot provide @regs_write
603            raise CsError(CS_ERR_DIET)
604
605        if self._cs._detail:
606            return self._raw.detail.contents.regs_write[:self._raw.detail.contents.regs_write_count]
607
608        raise CsError(CS_ERR_DETAIL)
609
610    # return list of semantic groups this instruction belongs to.
611    @property
612    def groups(self):
613        if self._raw.id == 0:
614            raise CsError(CS_ERR_SKIPDATA)
615
616        if self._cs._diet:
617            # Diet engine cannot provide @groups
618            raise CsError(CS_ERR_DIET)
619
620        if self._cs._detail:
621            return self._raw.detail.contents.groups[:self._raw.detail.contents.groups_count]
622
623        raise CsError(CS_ERR_DETAIL)
624
625    def __gen_detail(self):
626        if self._raw.id == 0:
627            # do nothing in skipdata mode
628            return
629
630        arch = self._cs.arch
631        if arch == CS_ARCH_ARM:
632            (self.usermode, self.vector_size, self.vector_data, self.cps_mode, self.cps_flag, self.cc, self.update_flags, \
633            self.writeback, self.mem_barrier, self.operands) = arm.get_arch_info(self._raw.detail.contents.arch.arm)
634        elif arch == CS_ARCH_ARM64:
635            (self.cc, self.update_flags, self.writeback, self.operands) = \
636                arm64.get_arch_info(self._raw.detail.contents.arch.arm64)
637        elif arch == CS_ARCH_X86:
638            (self.prefix, self.opcode, self.rex, self.addr_size, \
639                self.modrm, self.sib, self.disp, \
640                self.sib_index, self.sib_scale, self.sib_base, self.xop_cc, self.sse_cc, \
641                self.avx_cc, self.avx_sae, self.avx_rm, self.eflags, \
642                self.modrm_offset, self.disp_offset, self.disp_size, self.imm_offset, self.imm_size, \
643                self.operands) = x86.get_arch_info(self._raw.detail.contents.arch.x86)
644        elif arch == CS_ARCH_M68K:
645                (self.operands, self.op_size) = m68k.get_arch_info(self._raw.detail.contents.arch.m68k)
646        elif arch == CS_ARCH_MIPS:
647                self.operands = mips.get_arch_info(self._raw.detail.contents.arch.mips)
648        elif arch == CS_ARCH_PPC:
649            (self.bc, self.bh, self.update_cr0, self.operands) = \
650                ppc.get_arch_info(self._raw.detail.contents.arch.ppc)
651        elif arch == CS_ARCH_SPARC:
652            (self.cc, self.hint, self.operands) = sparc.get_arch_info(self._raw.detail.contents.arch.sparc)
653        elif arch == CS_ARCH_SYSZ:
654            (self.cc, self.operands) = systemz.get_arch_info(self._raw.detail.contents.arch.sysz)
655        elif arch == CS_ARCH_XCORE:
656            (self.operands) = xcore.get_arch_info(self._raw.detail.contents.arch.xcore)
657        elif arch == CS_ARCH_TMS320C64X:
658            (self.condition, self.funit, self.parallel, self.operands) = tms320c64x.get_arch_info(self._raw.detail.contents.arch.tms320c64x)
659        elif arch == CS_ARCH_M680X:
660            (self.flags, self.operands) = m680x.get_arch_info(self._raw.detail.contents.arch.m680x)
661        elif arch == CS_ARCH_EVM:
662            (self.pop, self.push, self.fee) = evm.get_arch_info(self._raw.detail.contents.arch.evm)
663
664
665    def __getattr__(self, name):
666        if not self._cs._detail:
667            raise CsError(CS_ERR_DETAIL)
668
669        attr = object.__getattribute__
670        if not attr(self, '_cs')._detail:
671            raise AttributeError(name)
672        _dict = attr(self, '__dict__')
673        if 'operands' not in _dict:
674            self.__gen_detail()
675        if name not in _dict:
676            if self._raw.id == 0:
677                raise CsError(CS_ERR_SKIPDATA)
678            raise AttributeError(name)
679        return _dict[name]
680
681    # get the last error code
682    def errno(self):
683        return _cs.cs_errno(self._cs.csh)
684
685    # get the register name, given the register ID
686    def reg_name(self, reg_id, default=None):
687        if self._cs._diet:
688            # Diet engine cannot provide register name
689            raise CsError(CS_ERR_DIET)
690
691        return _ascii_name_or_default(_cs.cs_reg_name(self._cs.csh, reg_id), default)
692
693    # get the instruction name
694    def insn_name(self, default=None):
695        if self._cs._diet:
696            # Diet engine cannot provide instruction name
697            raise CsError(CS_ERR_DIET)
698
699        if self._raw.id == 0:
700            return default
701
702        return _ascii_name_or_default(_cs.cs_insn_name(self._cs.csh, self.id), default)
703
704    # get the group name
705    def group_name(self, group_id, default=None):
706        if self._cs._diet:
707            # Diet engine cannot provide group name
708            raise CsError(CS_ERR_DIET)
709
710        return _ascii_name_or_default(_cs.cs_group_name(self._cs.csh, group_id), default)
711
712
713    # verify if this insn belong to group with id as @group_id
714    def group(self, group_id):
715        if self._raw.id == 0:
716            raise CsError(CS_ERR_SKIPDATA)
717
718        if self._cs._diet:
719            # Diet engine cannot provide group information
720            raise CsError(CS_ERR_DIET)
721
722        return group_id in self.groups
723
724    # verify if this instruction implicitly read register @reg_id
725    def reg_read(self, reg_id):
726        if self._raw.id == 0:
727            raise CsError(CS_ERR_SKIPDATA)
728
729        if self._cs._diet:
730            # Diet engine cannot provide regs_read information
731            raise CsError(CS_ERR_DIET)
732
733        return reg_id in self.regs_read
734
735    # verify if this instruction implicitly modified register @reg_id
736    def reg_write(self, reg_id):
737        if self._raw.id == 0:
738            raise CsError(CS_ERR_SKIPDATA)
739
740        if self._cs._diet:
741            # Diet engine cannot provide regs_write information
742            raise CsError(CS_ERR_DIET)
743
744        return reg_id in self.regs_write
745
746    # return number of operands having same operand type @op_type
747    def op_count(self, op_type):
748        if self._raw.id == 0:
749            raise CsError(CS_ERR_SKIPDATA)
750
751        c = 0
752        for op in self.operands:
753            if op.type == op_type:
754                c += 1
755        return c
756
757    # get the operand at position @position of all operands having the same type @op_type
758    def op_find(self, op_type, position):
759        if self._raw.id == 0:
760            raise CsError(CS_ERR_SKIPDATA)
761
762        c = 0
763        for op in self.operands:
764            if op.type == op_type:
765                c += 1
766            if c == position:
767                return op
768
769    # Return (list-of-registers-read, list-of-registers-modified) by this instructions.
770    # This includes all the implicit & explicit registers.
771    def regs_access(self):
772        if self._raw.id == 0:
773            raise CsError(CS_ERR_SKIPDATA)
774
775        regs_read = (ctypes.c_uint16 * 64)()
776        regs_read_count = ctypes.c_uint8()
777        regs_write = (ctypes.c_uint16 * 64)()
778        regs_write_count = ctypes.c_uint8()
779
780        status = _cs.cs_regs_access(self._cs.csh, self._raw, ctypes.byref(regs_read), ctypes.byref(regs_read_count), ctypes.byref(regs_write), ctypes.byref(regs_write_count))
781        if status != CS_ERR_OK:
782            raise CsError(status)
783
784        if regs_read_count.value > 0:
785            regs_read = regs_read[:regs_read_count.value]
786        else:
787            regs_read = ()
788
789        if regs_write_count.value > 0:
790            regs_write = regs_write[:regs_write_count.value]
791        else:
792            regs_write = ()
793
794        return (regs_read, regs_write)
795
796
797
798class Cs(object):
799    def __init__(self, arch, mode):
800        # verify version compatibility with the core before doing anything
801        (major, minor, _combined) = cs_version()
802        if major != CS_API_MAJOR or minor != CS_API_MINOR:
803            self.csh = None
804            # our binding version is different from the core's API version
805            raise CsError(CS_ERR_VERSION)
806
807        self.arch, self._mode = arch, mode
808        self.csh = ctypes.c_size_t()
809        status = _cs.cs_open(arch, mode, ctypes.byref(self.csh))
810        if status != CS_ERR_OK:
811            self.csh = None
812            raise CsError(status)
813
814        try:
815            import ccapstone
816            # rewire disasm to use the faster version
817            self.disasm = ccapstone.Cs(self).disasm
818        except:
819            pass
820
821        if arch == CS_ARCH_X86:
822            # Intel syntax is default for X86
823            self._syntax = CS_OPT_SYNTAX_INTEL
824        else:
825            self._syntax = None
826
827        self._detail = False  # by default, do not produce instruction details
828        self._imm_unsigned = False  # by default, print immediate operands as signed numbers
829        self._diet = cs_support(CS_SUPPORT_DIET)
830        self._x86reduce = cs_support(CS_SUPPORT_X86_REDUCE)
831
832        # default mnemonic for SKIPDATA
833        self._skipdata_mnem = ".byte"
834        self._skipdata_cb = (None, None)
835        # store reference to option object to avoid it being freed
836        # because C code uses it by reference
837        self._skipdata_opt = _cs_opt_skipdata()
838        self._skipdata = False
839
840
841
842    # destructor to be called automatically when object is destroyed.
843    def __del__(self):
844        if self.csh:
845            try:
846                status = _cs.cs_close(ctypes.byref(self.csh))
847                if status != CS_ERR_OK:
848                    raise CsError(status)
849            except: # _cs might be pulled from under our feet
850                pass
851
852
853    # def option(self, opt_type, opt_value):
854    #    return _cs.cs_option(self.csh, opt_type, opt_value)
855
856
857    # is this a diet engine?
858    @property
859    def diet(self):
860        return self._diet
861
862
863    # is this engine compiled with X86-reduce option?
864    @property
865    def x86_reduce(self):
866        return self._x86reduce
867
868
869    # return assembly syntax.
870    @property
871    def syntax(self):
872        return self._syntax
873
874
875    # syntax setter: modify assembly syntax.
876    @syntax.setter
877    def syntax(self, style):
878        status = _cs.cs_option(self.csh, CS_OPT_SYNTAX, style)
879        if status != CS_ERR_OK:
880            raise CsError(status)
881        # save syntax
882        self._syntax = style
883
884
885    # return current skipdata status
886    @property
887    def skipdata(self):
888        return self._skipdata
889
890
891    # setter: modify skipdata status
892    @skipdata.setter
893    def skipdata(self, opt):
894        if opt == False:
895            status = _cs.cs_option(self.csh, CS_OPT_SKIPDATA, CS_OPT_OFF)
896        else:
897            status = _cs.cs_option(self.csh, CS_OPT_SKIPDATA, CS_OPT_ON)
898        if status != CS_ERR_OK:
899            raise CsError(status)
900
901        # save this option
902        self._skipdata = opt
903
904
905    @property
906    def skipdata_setup(self):
907        return (self._skipdata_mnem,) + self._skipdata_cb
908
909
910    @skipdata_setup.setter
911    def skipdata_setup(self, opt):
912        _mnem, _cb, _ud = opt
913        self._skipdata_opt.mnemonic = _mnem.encode()
914        self._skipdata_opt.callback = CS_SKIPDATA_CALLBACK(_cb or 0)
915        self._skipdata_opt.user_data = ctypes.cast(_ud, ctypes.c_void_p)
916        status = _cs.cs_option(self.csh, CS_OPT_SKIPDATA_SETUP, ctypes.cast(ctypes.byref(self._skipdata_opt), ctypes.c_void_p))
917        if status != CS_ERR_OK:
918            raise CsError(status)
919
920        self._skipdata_mnem = _mnem
921        self._skipdata_cb = (_cb, _ud)
922
923
924    @property
925    def skipdata_mnem(self):
926        return self._skipdata_mnem
927
928
929    @skipdata_mnem.setter
930    def skipdata_mnem(self, mnem):
931        self.skipdata_setup = (mnem,) + self._skipdata_cb
932
933
934    @property
935    def skipdata_callback(self):
936        return self._skipdata_cb
937
938
939    @skipdata_callback.setter
940    def skipdata_callback(self, val):
941        if not isinstance(val, tuple):
942            val = (val, None)
943        func, data = val
944        self.skipdata_setup = (self._skipdata_mnem, func, data)
945
946
947    # customize instruction mnemonic
948    def mnemonic_setup(self, id, mnem):
949        _mnem_opt = _cs_opt_mnem()
950        _mnem_opt.id = id
951        if mnem:
952            _mnem_opt.mnemonic = mnem.encode()
953        else:
954            _mnem_opt.mnemonic = mnem
955        status = _cs.cs_option(self.csh, CS_OPT_MNEMONIC, ctypes.cast(ctypes.byref(_mnem_opt), ctypes.c_void_p))
956        if status != CS_ERR_OK:
957            raise CsError(status)
958
959
960    # check to see if this engine supports a particular arch,
961    # or diet mode (depending on @query).
962    def support(self, query):
963        return cs_support(query)
964
965
966    # is detail mode enable?
967    @property
968    def detail(self):
969        return self._detail
970
971
972    # modify detail mode.
973    @detail.setter
974    def detail(self, opt):  # opt is boolean type, so must be either 'True' or 'False'
975        if opt == False:
976            status = _cs.cs_option(self.csh, CS_OPT_DETAIL, CS_OPT_OFF)
977        else:
978            status = _cs.cs_option(self.csh, CS_OPT_DETAIL, CS_OPT_ON)
979        if status != CS_ERR_OK:
980            raise CsError(status)
981        # save detail
982        self._detail = opt
983
984
985    # is detail mode enable?
986    @property
987    def imm_unsigned(self):
988        return self._imm_unsigned
989
990
991    # modify detail mode.
992    @imm_unsigned.setter
993    def imm_unsigned(self, opt):  # opt is boolean type, so must be either 'True' or 'False'
994        if opt == False:
995            status = _cs.cs_option(self.csh, CS_OPT_UNSIGNED, CS_OPT_OFF)
996        else:
997            status = _cs.cs_option(self.csh, CS_OPT_UNSIGNED, CS_OPT_ON)
998        if status != CS_ERR_OK:
999            raise CsError(status)
1000        # save detail
1001        self._imm_unsigned = opt
1002
1003
1004    # return disassembly mode of this engine.
1005    @property
1006    def mode(self):
1007        return self._mode
1008
1009
1010    # modify engine's mode at run-time.
1011    @mode.setter
1012    def mode(self, opt):  # opt is new disasm mode, of int type
1013        status = _cs.cs_option(self.csh, CS_OPT_MODE, opt)
1014        if status != CS_ERR_OK:
1015            raise CsError(status)
1016        # save mode
1017        self._mode = opt
1018
1019    # get the last error code
1020    def errno(self):
1021        return _cs.cs_errno(self.csh)
1022
1023    # get the register name, given the register ID
1024    def reg_name(self, reg_id, default=None):
1025        if self._diet:
1026            # Diet engine cannot provide register name
1027            raise CsError(CS_ERR_DIET)
1028
1029        return _ascii_name_or_default(_cs.cs_reg_name(self.csh, reg_id), default)
1030
1031    # get the instruction name, given the instruction ID
1032    def insn_name(self, insn_id, default=None):
1033        if self._diet:
1034            # Diet engine cannot provide instruction name
1035            raise CsError(CS_ERR_DIET)
1036
1037        return _ascii_name_or_default(_cs.cs_insn_name(self.csh, insn_id), default)
1038
1039    # get the group name
1040    def group_name(self, group_id, default=None):
1041        if self._diet:
1042            # Diet engine cannot provide group name
1043            raise CsError(CS_ERR_DIET)
1044
1045        return _ascii_name_or_default(_cs.cs_group_name(self.csh, group_id), default)
1046
1047    # Disassemble binary & return disassembled instructions in CsInsn objects
1048    def disasm(self, code, offset, count=0):
1049        all_insn = ctypes.POINTER(_cs_insn)()
1050        '''if not _python2:
1051            print(code)
1052            code = code.encode()
1053            print(code)'''
1054        # Pass a bytearray by reference
1055        size = len(code)
1056        if isinstance(code, bytearray):
1057            code = ctypes.byref(ctypes.c_char.from_buffer(code))
1058        res = _cs.cs_disasm(self.csh, code, size, offset, count, ctypes.byref(all_insn))
1059        if res > 0:
1060            try:
1061                for i in range(res):
1062                    yield CsInsn(self, all_insn[i])
1063            finally:
1064                _cs.cs_free(all_insn, res)
1065        else:
1066            status = _cs.cs_errno(self.csh)
1067            if status != CS_ERR_OK:
1068                raise CsError(status)
1069            return
1070            yield
1071
1072
1073    # Light function to disassemble binary. This is about 20% faster than disasm() because
1074    # unlike disasm(), disasm_lite() only return tuples of (address, size, mnemonic, op_str),
1075    # rather than CsInsn objects.
1076    def disasm_lite(self, code, offset, count=0):
1077        if self._diet:
1078            # Diet engine cannot provide @mnemonic & @op_str
1079            raise CsError(CS_ERR_DIET)
1080
1081        all_insn = ctypes.POINTER(_cs_insn)()
1082        size = len(code)
1083        # Pass a bytearray by reference
1084        if isinstance(code, bytearray):
1085            code = ctypes.byref(ctypes.c_char.from_buffer(code))
1086        res = _cs.cs_disasm(self.csh, code, size, offset, count, ctypes.byref(all_insn))
1087        if res > 0:
1088            try:
1089                for i in range(res):
1090                    insn = all_insn[i]
1091                    yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii'))
1092            finally:
1093                _cs.cs_free(all_insn, res)
1094        else:
1095            status = _cs.cs_errno(self.csh)
1096            if status != CS_ERR_OK:
1097                raise CsError(status)
1098            return
1099            yield
1100
1101
1102# print out debugging info
1103def debug():
1104    # is Cython there?
1105    try:
1106        from . import ccapstone
1107        return ccapstone.debug()
1108    except:
1109        # no Cython, fallback to Python code below
1110        pass
1111
1112    if cs_support(CS_SUPPORT_DIET):
1113        diet = "diet"
1114    else:
1115        diet = "standard"
1116
1117    archs = { "arm": CS_ARCH_ARM, "arm64": CS_ARCH_ARM64, "m68k": CS_ARCH_M68K, \
1118        "mips": CS_ARCH_MIPS, "ppc": CS_ARCH_PPC, "sparc": CS_ARCH_SPARC, \
1119        "sysz": CS_ARCH_SYSZ, 'xcore': CS_ARCH_XCORE, "tms320c64x": CS_ARCH_TMS320C64X, \
1120        "m680x": CS_ARCH_M680X, 'evm': CS_ARCH_EVM }
1121
1122    all_archs = ""
1123    keys = archs.keys()
1124    for k in sorted(keys):
1125        if cs_support(archs[k]):
1126            all_archs += "-%s" % k
1127
1128    if cs_support(CS_ARCH_X86):
1129        all_archs += "-x86"
1130        if cs_support(CS_SUPPORT_X86_REDUCE):
1131            all_archs += "_reduce"
1132
1133    (major, minor, _combined) = cs_version()
1134
1135    return "python-%s%s-c%u.%u-b%u.%u" % (diet, all_archs, major, minor, CS_API_MAJOR, CS_API_MINOR)
1136