1#!/usr/bin/python
2# Test tool to compare Capstone output with llvm-mc. By Nguyen Anh Quynh, 2014
3import sys
4import os
5from capstone import *
6
7def test_file(fname):
8    print("Test %s" %fname);
9    f = open(fname)
10    lines = f.readlines()
11    f.close()
12
13    if not lines[0].startswith('# '):
14        print("ERROR: decoding information is missing")
15        return
16
17    # skip '# ' at the front, then split line to get out hexcode
18    # Note: option can be '', or 'None'
19    #print lines[0]
20    #print lines[0][2:].split(', ')
21    (arch, mode, option) = lines[0][2:].split(', ')
22    mode = mode.replace(' ', '')
23    option = option.strip()
24
25    archs = {
26        "CS_ARCH_ARM": CS_ARCH_ARM,
27        "CS_ARCH_ARM64": CS_ARCH_ARM64,
28        "CS_ARCH_MIPS": CS_ARCH_MIPS,
29        "CS_ARCH_PPC": CS_ARCH_PPC,
30        "CS_ARCH_SPARC": CS_ARCH_SPARC,
31        "CS_ARCH_SYSZ": CS_ARCH_SYSZ,
32        "CS_ARCH_X86": CS_ARCH_X86,
33        "CS_ARCH_XCORE": CS_ARCH_XCORE,
34    }
35
36    modes = {
37        "CS_MODE_16": CS_MODE_16,
38        "CS_MODE_32": CS_MODE_32,
39        "CS_MODE_64": CS_MODE_64,
40        "CS_MODE_MIPS32": CS_MODE_MIPS32,
41        "CS_MODE_MIPS64": CS_MODE_MIPS64,
42        "0": CS_MODE_ARM,
43        "CS_MODE_ARM": CS_MODE_ARM,
44        "CS_MODE_THUMB": CS_MODE_THUMB,
45        "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8,
46        "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8,
47        "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS,
48        "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN,
49        "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN,
50        "CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN,
51        "CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN,
52        "CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO,
53        "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
54        "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
55        "CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9,
56        "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN,
57        "CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN,
58        "CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN,
59        "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN,
60    }
61
62    mc_modes = {
63        ("CS_ARCH_X86", "CS_MODE_32"): 0,
64        ("CS_ARCH_X86", "CS_MODE_64"): 1,
65        ("CS_ARCH_ARM", "CS_MODE_ARM"): 2,
66        ("CS_ARCH_ARM", "CS_MODE_THUMB"): 3,
67        ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): 4,
68        ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): 5,
69        ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): 6,
70        ("CS_ARCH_ARM64", "0"): 7,
71        ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): 8,
72        ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): 9,
73        ("CS_ARCH_MIPS", "CS_MODE_MIPS64"): 10,
74        ("CS_ARCH_MIPS", "CS_MODE_MIPS32"): 11,
75        ("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): 12,
76        ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): 13,
77        ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): 13,
78        ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): 14,
79        ("CS_ARCH_SPARC", "CS_MODE_BIG_ENDIAN"): 15,
80        ("CS_ARCH_SPARC", "CS_MODE_BIG_ENDIAN+CS_MODE_V9"): 16,
81        ("CS_ARCH_SYSZ", "0"): 17,
82        ("CS_ARCH_XCORE", "0"): 18,
83    }
84
85    #if not option in ('', 'None'):
86    #    print archs[arch], modes[mode], options[option]
87
88    for line in lines[1:]:
89        # ignore all the input lines having # in front.
90        if line.startswith('#'):
91            continue
92        #print("Check %s" %line)
93        code = line.split(' = ')[0]
94        if len(code) < 2:
95            continue
96        asm  = ''.join(line.split(' = ')[1:])
97        hex_code = code.replace('0x', '')
98        hex_code = hex_code.replace(',', '')
99        hex_data = hex_code.decode('hex')
100        fout = open("fuzz/corpus/%s_%s" % (os.path.basename(fname), hex_code), 'w')
101        if (arch, mode) not in mc_modes:
102            print "fail", arch, mode
103        fout.write(unichr(mc_modes[(arch, mode)]))
104        fout.write(hex_data)
105        fout.close()
106
107
108if __name__ == '__main__':
109    if len(sys.argv) == 1:
110        fnames = sys.stdin.readlines()
111        for fname in fnames:
112            test_file(fname.strip())
113    else:
114        #print("Usage: ./test_mc.py <input-file.s.cs>")
115        test_file(sys.argv[1])
116
117