1#!/usr/bin/python
2# print MappingInsn.inc file from LLVM GenAsmMatcher.inc, for Capstone disassembler.
3# by Nguyen Anh Quynh, 2019
4
5import sys
6
7if len(sys.argv) == 1:
8    print("Syntax: %s <GenAsmMatcher.inc> <GenInstrInfo.inc> MappingInsn.inc" %sys.argv[0])
9    sys.exit(1)
10
11f = open(sys.argv[3])
12mapping = f.readlines()
13f.close()
14
15print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */
16/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */
17/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */
18""")
19
20# lib/Target/X86/X86GenAsmMatcher.inc
21# static const MatchEntry MatchTable1[] = {
22#  { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, {  }, },
23
24# extract insn from GenAsmMatcher Table
25# return (arch, mnem, insn_id)
26def extract_insn(line):
27    tmp = line.split(',')
28    insn_raw = tmp[1].strip()
29    insn_mnem = tmp[0].split(' ')[3]
30    # X86 mov.s
31    if '.' in insn_mnem:
32        tmp = insn_mnem.split('.')
33        insn_mnem = tmp[0]
34    tmp = insn_raw.split('::')
35    arch = tmp[0]
36    # AArch64 -> ARM64
37    if arch.upper() == 'AARCH64':
38        arch = 'ARM64'
39    return (arch, insn_mnem, tmp[1])
40
41
42
43# extract all insn lines from GenAsmMatcher
44# return arch, insn_id_list, insn_lines
45def extract_matcher(filename):
46    f = open(filename)
47    lines = f.readlines()
48    f.close()
49
50    match_count = 0
51    count = 0
52    #insn_lines = []
53    insn_id_list = {}
54    arch = None
55    first_insn = None
56
57    # 1st enum is register enum
58    for line in lines:
59        line = line.rstrip()
60
61        if len(line.strip()) == 0:
62            continue
63
64        if 'MatchEntry MatchTable1[] = {' in line.strip():
65            match_count += 1
66            #print(line.strip())
67            continue
68
69        line = line.strip()
70        if match_count == 1:
71            count += 1
72            if line == '};':
73                # done with first enum
74                break
75            else:
76                _arch, mnem, insn_id = extract_insn(line)
77                if count == 1:
78                    arch, first_insn = _arch, insn_id
79
80                if not insn_id in insn_id_list:
81                    # print("***", arch, mnem, insn_id)
82                    insn_id_list[insn_id] = mnem
83                    #insn_lines.append(line)
84
85    #return arch, first_insn, insn_id_list, insn_lines
86    return arch, first_insn, insn_id_list
87
88
89#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1])
90arch, first_insn, insn_id_list = extract_matcher(sys.argv[1])
91arch = arch.upper()
92
93#for line in insn_id_list:
94#    print(line)
95
96
97#{
98#        X86_AAA, X86_INS_AAA,
99##ifndef CAPSTONE_DIET
100#        { 0 }, { 0 }, { X86_GRP_NOT64BITMODE, 0 }, 0, 0
101##endif
102#},
103def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong):
104    insn = "%s_%s" %(arch.upper(), insn_id)
105    if '64' in insn_id:
106        is64bit = '1'
107    else:
108        is64bit = '0'
109    # first, try to find this entry in old MappingInsn.inc file
110    for i in range(len(mapping)):
111        tmp = mapping[i].split(',')
112        if tmp[0].strip() == insn:
113            if not mnem_can_be_wrong:
114                print('''
115{
116\t%s_%s, %s_INS_%s, %s,
117#ifndef CAPSTONE_DIET
118\t%s
119#endif
120},'''% (arch, insn_id, arch, mnem, is64bit, mapping[i + 2].strip()))
121            else:
122                if not tmp[1].endswith(mnem):
123                    #print("======== cannot find %s, mapping to %s (instead of %s)" %(insn, tmp[1].strip(), mnem))
124                    pass
125                print('''
126{
127\t%s_%s, %s, %s,
128#ifndef CAPSTONE_DIET
129\t%s
130#endif
131},'''% (arch, insn_id, tmp[1].strip(), is64bit, mapping[i + 2].strip()))
132
133            return
134
135    if mnem_can_be_wrong:
136        #print("======== CANNOT FIND %s, mapping to %s" %(insn, mnem))
137        pass
138
139    print('''
140{
141\t%s_%s, %s_INS_%s, %s,
142#ifndef CAPSTONE_DIET
143\t{ 0 }, { 0 }, { 0 }, 0, 0
144#endif
145},'''% (arch, insn_id, arch, mnem, is64bit))
146
147
148# extract from GenInstrInfo.inc, because the insn id is in order
149enum_count = 0
150meet_insn = False
151
152f = open(sys.argv[2])
153lines = f.readlines()
154f.close()
155
156
157count = 0
158last_mnem = None
159
160# 1st enum is register enum
161for line in lines:
162    line = line.rstrip()
163
164    if len(line.strip()) == 0:
165        continue
166
167    if line.strip() == 'enum {':
168        enum_count += 1
169        #print(line.strip())
170        continue
171
172    line = line.strip()
173    if enum_count == 1:
174        if 'INSTRUCTION_LIST_END' in line:
175            break
176        else:
177            insn = None
178            if meet_insn:
179                # enum items
180                insn = line.split('=')[0].strip()
181                if 'CALLSTACK' in insn or 'TAILJUMP' in insn:
182                    # pseudo instruction
183                    insn = None
184            elif line.startswith(first_insn):
185                insn = line.split('=')[0].strip()
186                meet_insn = True
187
188            if insn:
189                count += 1
190                if insn == 'BSWAP16r_BAD':
191                    last_mnem = 'BSWAP'
192                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
193                elif insn == 'CMOVNP_Fp32':
194                    last_mnem = 'FCMOVNP'
195                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
196                elif insn == 'CMOVP_Fp3':
197                    last_mnem = 'FCMOVP'
198                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
199                elif insn == 'CMPSDrm_Int':
200                    last_mnem = 'CMPSD'
201                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
202                elif insn == 'MOVSX16rm16':
203                    last_mnem = 'MOVSX'
204                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
205                elif insn == 'MOVZX16rm16':
206                    last_mnem = 'MOVZX'
207                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
208                elif insn == 'ST_Fp32m':
209                    last_mnem = 'FST'
210                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
211                elif insn == 'CMOVNP_Fp64':
212                    last_mnem = 'FCMOVNU'
213                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
214                elif insn == 'CMPSDrr_Int':
215                    last_mnem = 'CMPSD'
216                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
217                elif insn == 'CMPSSrm_Int':
218                    last_mnem = 'CMPSS'
219                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
220                elif insn == 'VCMPSDrm_Int':
221                    last_mnem = 'VCMPSD'
222                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
223                elif insn == 'VCMPSSrm_Int':
224                    last_mnem = 'VCMPSS'
225                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
226                elif insn == 'VPCMOVYrrr_REV':
227                    last_mnem = 'VPCMOV'
228                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
229                elif insn == 'VRNDSCALESDZm':
230                    last_mnem = 'VRNDSCALESD'
231                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
232                elif insn == 'VRNDSCALESSZm':
233                    last_mnem = 'VRNDSCALESS'
234                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
235                elif insn == 'VMAXCPDZ128rm':
236                    last_mnem = 'VMAXPD'
237                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
238                elif insn == 'VMAXCPSZ128rm':
239                    last_mnem = 'VMAXPS'
240                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
241                elif insn == 'VMAXCSDZrm':
242                    last_mnem = 'VMAXSD'
243                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
244                elif insn == 'VMAXCSSZrm':
245                    last_mnem = 'VMAXSS'
246                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
247                elif insn == 'VMINCPDZ128rm':
248                    last_mnem = 'VMINPD'
249                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
250                elif insn == 'VMINCPSZ128rm':
251                    last_mnem = 'VMINPS'
252                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
253                elif insn == 'VMINCSDZrm':
254                    last_mnem = 'VMINSD'
255                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
256                elif insn == 'VMINCSSZrm':
257                    last_mnem = 'VMINSS'
258                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
259                elif insn == 'VMOV64toPQIZrm':
260                    last_mnem = 'VMOVQ'
261                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
262                elif insn == 'VPERMIL2PDYrr_REV':
263                    last_mnem = 'VPERMILPD'
264                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
265                elif insn == 'VPERMIL2PSYrr_REV':
266                    last_mnem = 'VPERMILPS'
267                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
268                elif insn == 'VCVTSD2SI64Zrm_Int':
269                    last_mnem = 'VCVTSD2SI'
270                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
271                elif insn == 'VCVTSD2SSrm_Int':
272                    last_mnem = 'VCVTSD2SS'
273                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
274                elif insn ==    'VCVTSS2SI64Zrm_Int':
275                    last_mnem = 'VCVTSS2SI'
276                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
277                elif insn ==    'VCVTTSD2SI64Zrm_Int':
278                    last_mnem = 'VCVTTSD2SI'
279                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
280                elif insn ==    'VCVTTSS2SI64Zrm_Int':
281                    last_mnem = 'VCVTTSS2SI'
282                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
283
284                elif insn.startswith('VFMSUBADD'):
285                    if insn[len('VFMSUBADD')].isdigit():
286                        last_mnem = insn[:len('VFMSUBADD123xy')]
287                    else:
288                        last_mnem = insn[:len('VFMSUBADDSS')]
289                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
290
291                elif insn.startswith('VFMADDSUB'):
292                    if insn[len('VFMADDSUB')].isdigit():
293                        last_mnem = insn[:len('VFMADDSUB123xy')]
294                    else:
295                        last_mnem = insn[:len('VFMADDSUBSS')]
296                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
297
298                elif insn.startswith('VFMADD'):
299                    if insn[len('VFMADD')].isdigit():
300                        last_mnem = insn[:len('VFMADD123PD')]
301                    else:
302                        last_mnem = insn[:len('VFMADDPD')]
303                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
304
305                elif insn.startswith('VFMSUB'):
306                    if insn[len('VFMSUB')].isdigit():
307                        last_mnem = insn[:len('VFMSUB123PD')]
308                    else:
309                        last_mnem = insn[:len('VFMSUBPD')]
310                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
311
312                elif insn.startswith('VFNMADD'):
313                    if insn[len('VFNMADD')].isdigit():
314                        last_mnem = insn[:len('VFNMADD123xy')]
315                    else:
316                        last_mnem = insn[:len('VFNMADDSS')]
317                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
318
319                elif insn.startswith('VFNMSUB'):
320                    if insn[len('VFNMSUB')].isdigit():
321                        last_mnem = insn[:len('VFNMSUB123xy')]
322                    else:
323                        last_mnem = insn[:len('VFNMSUBSS')]
324                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
325
326                elif insn in insn_id_list:
327                    # trust old mapping table
328                    last_mnem = insn_id_list[insn].upper()
329                    print_entry(arch.upper(), insn, insn_id_list[insn].upper(), mapping, False)
330                else:
331                    # the last option when we cannot find mnem: use the last good mnem
332                    print_entry(arch.upper(), insn, last_mnem, mapping, True)
333