1#!/usr/bin/python
2# print MappingInsn.inc file from LLVM GenAsmMatcher.inc, for Capstone disassembler.
3# by Nguyen Anh Quynh, 2019
4
5import sys
6
7if len(sys.argv) == 1:
8    print("Syntax: %s <GenAsmMatcher.inc> <GenInstrInfo.inc> <MappingInsnOp.inc>" %sys.argv[0])
9    sys.exit(1)
10
11f = open(sys.argv[3])
12mapping = f.readlines()
13f.close()
14
15print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */
16/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */
17/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */
18""")
19
20# lib/Target/X86/X86GenAsmMatcher.inc
21# static const MatchEntry MatchTable1[] = {
22#  { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, {  }, },
23
24# extract insn from GenAsmMatcher Table
25# return (arch, mnem, insn_id)
26def extract_insn(line):
27    tmp = line.split(',')
28    insn_raw = tmp[1].strip()
29    insn_mnem = tmp[0].split(' ')[3]
30    # X86 mov.s
31    if '.' in insn_mnem:
32        tmp = insn_mnem.split('.')
33        insn_mnem = tmp[0]
34    tmp = insn_raw.split('::')
35    arch = tmp[0]
36    # AArch64 -> ARM64
37    if arch.upper() == 'AARCH64':
38        arch = 'ARM64'
39    return (arch, insn_mnem, tmp[1])
40
41
42
43# extract all insn lines from GenAsmMatcher
44# return arch, insn_id_list, insn_lines
45def extract_matcher(filename):
46    f = open(filename)
47    lines = f.readlines()
48    f.close()
49
50    match_count = 0
51    count = 0
52    #insn_lines = []
53    insn_id_list = {}
54    arch = None
55    first_insn = None
56
57    # 1st enum is register enum
58    for line in lines:
59        line = line.rstrip()
60
61        if len(line.strip()) == 0:
62            continue
63
64        if 'MatchEntry MatchTable1[] = {' in line.strip():
65            match_count += 1
66            #print(line.strip())
67            continue
68
69        line = line.strip()
70        if match_count == 1:
71            count += 1
72            if line == '};':
73                # done with first enum
74                break
75            else:
76                _arch, mnem, insn_id = extract_insn(line)
77                if count == 1:
78                    arch, first_insn = _arch, insn_id
79
80                if not insn_id in insn_id_list:
81                    # print("***", arch, mnem, insn_id)
82                    insn_id_list[insn_id] = mnem
83                    #insn_lines.append(line)
84
85    #return arch, first_insn, insn_id_list, insn_lines
86    return arch, first_insn, insn_id_list
87
88
89#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1])
90arch, first_insn, insn_id_list = extract_matcher(sys.argv[1])
91arch = arch.upper()
92
93#for line in insn_id_list:
94#    print(line)
95
96#{ /* X86_AAA, X86_INS_AAA: aaa */
97#  X86_EFLAGS_UNDEFINED_OF | X86_EFLAGS_UNDEFINED_SF | X86_EFLAGS_UNDEFINED_ZF | X86_EFLAGS_MODIFY_AF | X86_EFLAGS_UNDEFINED_PF | X86_EFLAGS_MODIFY_CF,
98#  { 0 }
99#},
100def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong):
101    insn = "%s_%s" %(arch, insn_id)
102    # first, try to find this entry in old MappingInsn.inc file
103    for i in range(len(mapping)):
104        if mapping[i].startswith('{') and '/*' in mapping[i]:
105            #print(mapping[i])
106            tmp = mapping[i].split('/*')
107            tmp = tmp[1].strip()
108            tmp = tmp.split(',')
109            #print("insn2 = |%s|" %tmp.strip())
110            if tmp[0].strip() == insn:
111                if not mnem_can_be_wrong:
112                    print('''
113{\t/* %s, %s_INS_%s: %s */
114\t%s
115\t%s
116},'''% (insn, arch, mnem, mnem.lower(), mapping[i + 1].strip(), mapping[i + 2].strip()))
117                else:
118                    print('''
119{\t/* %s, %s
120\t%s
121\t%s
122},'''% (insn, ''.join(tmp[1:]).strip(), mapping[i + 1].strip(), mapping[i + 2].strip()))
123
124                return
125
126    print('''
127{\t/* %s, %s_INS_%s: %s */
128\t0,
129\t{ 0 }
130},'''% (insn, arch, mnem, mnem.lower()))
131
132
133
134# extract from GenInstrInfo.inc, because the insn id is in order
135enum_count = 0
136meet_insn = False
137
138f = open(sys.argv[2])
139lines = f.readlines()
140f.close()
141
142
143count = 0
144last_mnem = None
145
146# 1st enum is register enum
147for line in lines:
148    line = line.rstrip()
149
150    if len(line.strip()) == 0:
151        continue
152
153    if line.strip() == 'enum {':
154        enum_count += 1
155        #print(line.strip())
156        continue
157
158    line = line.strip()
159    if enum_count == 1:
160        if 'INSTRUCTION_LIST_END' in line:
161            break
162        else:
163            insn = None
164            if meet_insn:
165                # enum items
166                insn = line.split('=')[0].strip()
167                if 'CALLSTACK' in insn or 'TAILJUMP' in insn:
168                    # pseudo instruction
169                    insn = None
170            elif line.startswith(first_insn):
171                insn = line.split('=')[0].strip()
172                meet_insn = True
173
174            if insn:
175                count += 1
176                if insn == 'BSWAP16r_BAD':
177                    last_mnem = 'BSWAP'
178                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
179                elif insn == 'CMOVNP_Fp32':
180                    last_mnem = 'FCMOVNP'
181                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
182                elif insn == 'CMOVP_Fp3':
183                    last_mnem = 'FCMOVP'
184                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
185                elif insn == 'CMPSDrm_Int':
186                    last_mnem = 'CMPSD'
187                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
188                elif insn == 'MOVSX16rm16':
189                    last_mnem = 'MOVSX'
190                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
191                elif insn == 'MOVZX16rm16':
192                    last_mnem = 'MOVZX'
193                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
194                elif insn == 'ST_Fp32m':
195                    last_mnem = 'FST'
196                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
197                elif insn == 'CMOVNP_Fp64':
198                    last_mnem = 'FCMOVNU'
199                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
200                elif insn == 'CMPSDrr_Int':
201                    last_mnem = 'CMPSD'
202                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
203                elif insn == 'CMPSSrm_Int':
204                    last_mnem = 'CMPSS'
205                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
206                elif insn == 'VCMPSDrm_Int':
207                    last_mnem = 'VCMPSD'
208                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
209                elif insn == 'VCMPSSrm_Int':
210                    last_mnem = 'VCMPSS'
211                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
212                elif insn == 'VPCMOVYrrr_REV':
213                    last_mnem = 'VPCMOV'
214                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
215                elif insn == 'VRNDSCALESDZm':
216                    last_mnem = 'VRNDSCALESD'
217                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
218                elif insn == 'VRNDSCALESSZm':
219                    last_mnem = 'VRNDSCALESS'
220                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
221                elif insn == 'VMAXCPDZ128rm':
222                    last_mnem = 'VMAXPD'
223                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
224                elif insn == 'VMAXCPSZ128rm':
225                    last_mnem = 'VMAXPS'
226                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
227                elif insn == 'VMAXCSDZrm':
228                    last_mnem = 'VMAXSD'
229                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
230                elif insn == 'VMAXCSSZrm':
231                    last_mnem = 'VMAXSS'
232                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
233                elif insn == 'VMINCPDZ128rm':
234                    last_mnem = 'VMINPD'
235                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
236                elif insn == 'VMINCPSZ128rm':
237                    last_mnem = 'VMINPS'
238                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
239                elif insn == 'VMINCSDZrm':
240                    last_mnem = 'VMINSD'
241                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
242                elif insn == 'VMINCSSZrm':
243                    last_mnem = 'VMINSS'
244                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
245                elif insn == 'VMOV64toPQIZrm':
246                    last_mnem = 'VMOVQ'
247                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
248                elif insn == 'VPERMIL2PDYrr_REV':
249                    last_mnem = 'VPERMILPD'
250                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
251                elif insn == 'VPERMIL2PSYrr_REV':
252                    last_mnem = 'VPERMILPS'
253                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
254                elif insn == 'VCVTSD2SI64Zrm_Int':
255                    last_mnem = 'VCVTSD2SI'
256                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
257                elif insn == 'VCVTSD2SSrm_Int':
258                    last_mnem = 'VCVTSD2SS'
259                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
260                elif insn ==    'VCVTSS2SI64Zrm_Int':
261                    last_mnem = 'VCVTSS2SI'
262                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
263                elif insn ==    'VCVTTSD2SI64Zrm_Int':
264                    last_mnem = 'VCVTTSD2SI'
265                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
266                elif insn ==    'VCVTTSS2SI64Zrm_Int':
267                    last_mnem = 'VCVTTSS2SI'
268                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
269
270                elif insn.startswith('VFMSUBADD'):
271                    if insn[len('VFMSUBADD')].isdigit():
272                        last_mnem = insn[:len('VFMSUBADD123xy')]
273                    else:
274                        last_mnem = insn[:len('VFMSUBADDSS')]
275                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
276
277                elif insn.startswith('VFMADDSUB'):
278                    if insn[len('VFMADDSUB')].isdigit():
279                        last_mnem = insn[:len('VFMADDSUB123xy')]
280                    else:
281                        last_mnem = insn[:len('VFMADDSUBSS')]
282                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
283
284                elif insn.startswith('VFMADD'):
285                    if insn[len('VFMADD')].isdigit():
286                        last_mnem = insn[:len('VFMADD123PD')]
287                    else:
288                        last_mnem = insn[:len('VFMADDPD')]
289                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
290
291                elif insn.startswith('VFMSUB'):
292                    if insn[len('VFMSUB')].isdigit():
293                        last_mnem = insn[:len('VFMSUB123PD')]
294                    else:
295                        last_mnem = insn[:len('VFMSUBPD')]
296                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
297
298                elif insn.startswith('VFNMADD'):
299                    if insn[len('VFNMADD')].isdigit():
300                        last_mnem = insn[:len('VFNMADD123xy')]
301                    else:
302                        last_mnem = insn[:len('VFNMADDSS')]
303                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
304
305                elif insn.startswith('VFNMSUB'):
306                    if insn[len('VFNMSUB')].isdigit():
307                        last_mnem = insn[:len('VFNMSUB123xy')]
308                    else:
309                        last_mnem = insn[:len('VFNMSUBSS')]
310                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
311
312                elif insn in insn_id_list:
313                    # trust old mapping table
314                    last_mnem = insn_id_list[insn].upper()
315                    print_entry(arch.upper(), insn, insn_id_list[insn].upper(), mapping, False)
316                else:
317                    # the last option when we cannot find mnem: use the last good mnem
318                    print_entry(arch.upper(), insn, last_mnem, mapping, True)
319