1#!/usr/bin/python
2# print MappingInsn.inc file from LLVM GenAsmMatcher.inc, for Capstone disassembler.
3# by Nguyen Anh Quynh, 2019
4
5import sys
6
7if len(sys.argv) == 1:
8    print("Syntax: %s <GenAsmMatcher.inc> <GenInstrInfo.inc> MappingInsn.inc" %sys.argv[0])
9    sys.exit(1)
10
11f = open(sys.argv[3])
12mapping = f.readlines()
13f.close()
14
15print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */
16/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */
17/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */
18""")
19
20# lib/Target/X86/X86GenAsmMatcher.inc
21# static const MatchEntry MatchTable1[] = {
22#  { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, {  }, },
23
24# extract insn from GenAsmMatcher Table
25# return (arch, mnem, insn_id)
26def extract_insn(line):
27    tmp = line.split(',')
28    insn_raw = tmp[1].strip()
29    insn_mnem = tmp[0].split(' ')[3]
30    # X86 mov.s
31    if '.' in insn_mnem:
32        tmp = insn_mnem.split('.')
33        insn_mnem = tmp[0]
34    tmp = insn_raw.split('::')
35    arch = tmp[0]
36    # AArch64 -> ARM64
37    #if arch.upper() == 'AARCH64':
38    #    arch = 'ARM64'
39    return (arch, insn_mnem, tmp[1])
40
41
42# extract all insn lines from GenAsmMatcher
43# return arch, first_insn, insn_id_list
44def extract_matcher(filename):
45    f = open(filename)
46    lines = f.readlines()
47    f.close()
48
49    match_count = 0
50    insn_id_list = {}
51    arch = None
52    first_insn = None
53
54    pattern = None
55    # first we try to find Table1, or Table0
56    for line in lines:
57        if 'MatchEntry MatchTable0[] = {' in line.strip():
58            pattern = 'MatchEntry MatchTable0[] = {'
59        elif 'AArch64::' in line and pattern:
60            # We do not care about Apple Assembly
61            break
62        elif 'MatchEntry MatchTable1[] = {' in line.strip():
63            pattern = 'MatchEntry MatchTable1[] = {'
64            # last pattern, done
65            break
66
67    for line in lines:
68        line = line.rstrip()
69
70        # skip empty line
71        if len(line.strip()) == 0:
72            continue
73
74        if pattern in line.strip():
75            match_count += 1
76            #print(line.strip())
77            continue
78
79        line = line.strip()
80        if match_count == 1:
81            if line == '};':
82                # done with first enum
83                break
84            else:
85                _arch, mnem, insn_id = extract_insn(line)
86                # skip pseudo instructions
87                if not mnem.startswith('__'):
88                    # PPC
89                    if mnem.endswith('-') or mnem.endswith('+'):
90                        mnem = mnem[:-1]
91
92                    if not first_insn:
93                        arch, first_insn = _arch, insn_id
94
95                    if not insn_id in insn_id_list:
96                        # save this
97                        insn_id_list[insn_id] = mnem
98
99    #return arch, first_insn, insn_id_list
100    return arch, first_insn, insn_id_list
101
102
103#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1])
104arch, first_insn, insn_id_list = extract_matcher(sys.argv[1])
105#arch = arch.upper()
106#print("first insn = %s" %first_insn)
107
108#for line in insn_id_list:
109#    print(line)
110
111
112#{
113#        X86_AAA, X86_INS_AAA,
114##ifndef CAPSTONE_DIET
115#        { 0 }, { 0 }, { X86_GRP_NOT64BITMODE, 0 }, 0, 0
116##endif
117#},
118def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong):
119    #insn = "%s_%s" %(arch.upper(), insn_id)
120    insn = "%s_%s" %(arch, insn_id)
121    arch1 = arch
122    if arch.upper() == 'AARCH64':
123        arch1 = 'ARM64'
124    #if '64' in insn_id:
125    #    is64bit = '1'
126    #else:
127    #    is64bit = '0'
128    # first, try to find this entry in old MappingInsn.inc file
129    for i in range(len(mapping)):
130        tmp = mapping[i].split(',')
131        if tmp[0].strip() == insn:
132            if not mnem_can_be_wrong:
133                print('''
134{
135\t%s, %s_INS_%s,
136#ifndef CAPSTONE_DIET
137\t%s
138#endif
139},'''% (insn, arch1, mnem, mapping[i + 2].strip()))
140            else: # ATTENTION: mnem can be wrong
141                if not tmp[1].endswith(mnem):
142                    #print("======== cannot find %s, mapping to %s (instead of %s)" %(insn, tmp[1].strip(), mnem))
143                    pass
144                print('''
145{
146\t%s, %s,
147#ifndef CAPSTONE_DIET
148\t%s
149#endif
150},'''% (insn, tmp[1].strip(), mapping[i + 2].strip()))
151
152            return
153
154    if mnem_can_be_wrong:
155        #print("======== CANNOT FIND %s, mapping to %s" %(insn, mnem))
156        return
157        pass
158
159    # this insn does not exist in mapping table
160    print('''
161{
162\t%s, %s_INS_%s,
163#ifndef CAPSTONE_DIET
164\t{ 0 }, { 0 }, { 0 }, 0, 0
165#endif
166},'''% (insn, arch1, mnem))
167
168
169# extract from GenInstrInfo.inc, because the insn id is in order
170enum_count = 0
171meet_insn = False
172
173f = open(sys.argv[2])
174lines = f.readlines()
175f.close()
176
177count = 0
178last_mnem = None
179
180# 1st enum is register enum
181for line in lines:
182    line = line.rstrip()
183
184    if len(line.strip()) == 0:
185        continue
186
187    # skip pseudo instructions
188    if len(line.strip()) == 0:
189        continue
190
191    if line.strip() == 'enum {':
192        enum_count += 1
193        #print(line.strip())
194        continue
195
196    line = line.strip()
197    if enum_count == 1:
198        # skip pseudo instructions
199        if '__' in line or 'setjmp' in line or 'longjmp' in line or 'Pseudo' in line:
200            continue
201        elif 'INSTRUCTION_LIST_END' in line:
202            break
203        else:
204            insn = line.split('=')[0].strip()
205            '''
206            insn = None
207            if meet_insn:
208                # enum items
209                insn = line.split('=')[0].strip()
210                if 'CALLSTACK' in insn or 'TAILJUMP' in insn:
211                    # pseudo instruction
212                    insn = None
213            elif line.startswith(first_insn):
214                insn = line.split('=')[0].strip()
215                meet_insn = True
216
217            if insn:
218                count += 1
219                if insn == 'BSWAP16r_BAD':
220                    last_mnem = 'BSWAP'
221                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
222                elif insn == 'CMOVNP_Fp32':
223                    last_mnem = 'FCMOVNP'
224                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
225                elif insn == 'CMOVP_Fp3':
226                    last_mnem = 'FCMOVP'
227                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
228                elif insn == 'CMPSDrm_Int':
229                    last_mnem = 'CMPSD'
230                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
231                elif insn == 'MOVSX16rm16':
232                    last_mnem = 'MOVSX'
233                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
234                elif insn == 'MOVZX16rm16':
235                    last_mnem = 'MOVZX'
236                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
237                elif insn == 'ST_Fp32m':
238                    last_mnem = 'FST'
239                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
240                elif insn == 'CMOVNP_Fp64':
241                    last_mnem = 'FCMOVNU'
242                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
243                elif insn == 'CMPSDrr_Int':
244                    last_mnem = 'CMPSD'
245                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
246                elif insn == 'CMPSSrm_Int':
247                    last_mnem = 'CMPSS'
248                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
249                elif insn == 'VCMPSDrm_Int':
250                    last_mnem = 'VCMPSD'
251                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
252                elif insn == 'VCMPSSrm_Int':
253                    last_mnem = 'VCMPSS'
254                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
255                elif insn == 'VPCMOVYrrr_REV':
256                    last_mnem = 'VPCMOV'
257                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
258                elif insn == 'VRNDSCALESDZm':
259                    last_mnem = 'VRNDSCALESD'
260                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
261                elif insn == 'VRNDSCALESSZm':
262                    last_mnem = 'VRNDSCALESS'
263                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
264                elif insn == 'VMAXCPDZ128rm':
265                    last_mnem = 'VMAXPD'
266                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
267                elif insn == 'VMAXCPSZ128rm':
268                    last_mnem = 'VMAXPS'
269                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
270                elif insn == 'VMAXCSDZrm':
271                    last_mnem = 'VMAXSD'
272                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
273                elif insn == 'VMAXCSSZrm':
274                    last_mnem = 'VMAXSS'
275                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
276                elif insn == 'VMINCPDZ128rm':
277                    last_mnem = 'VMINPD'
278                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
279                elif insn == 'VMINCPSZ128rm':
280                    last_mnem = 'VMINPS'
281                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
282                elif insn == 'VMINCSDZrm':
283                    last_mnem = 'VMINSD'
284                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
285                elif insn == 'VMINCSSZrm':
286                    last_mnem = 'VMINSS'
287                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
288                elif insn == 'VMOV64toPQIZrm':
289                    last_mnem = 'VMOVQ'
290                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
291                elif insn == 'VPERMIL2PDYrr_REV':
292                    last_mnem = 'VPERMILPD'
293                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
294                elif insn == 'VPERMIL2PSYrr_REV':
295                    last_mnem = 'VPERMILPS'
296                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
297                elif insn == 'VCVTSD2SI64Zrm_Int':
298                    last_mnem = 'VCVTSD2SI'
299                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
300                elif insn == 'VCVTSD2SSrm_Int':
301                    last_mnem = 'VCVTSD2SS'
302                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
303                elif insn ==    'VCVTSS2SI64Zrm_Int':
304                    last_mnem = 'VCVTSS2SI'
305                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
306                elif insn ==    'VCVTTSD2SI64Zrm_Int':
307                    last_mnem = 'VCVTTSD2SI'
308                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
309                elif insn ==    'VCVTTSS2SI64Zrm_Int':
310                    last_mnem = 'VCVTTSS2SI'
311                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
312
313                elif insn.startswith('VFMSUBADD'):
314                    if insn[len('VFMSUBADD')].isdigit():
315                        last_mnem = insn[:len('VFMSUBADD123xy')]
316                    else:
317                        last_mnem = insn[:len('VFMSUBADDSS')]
318                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
319
320                elif insn.startswith('VFMADDSUB'):
321                    if insn[len('VFMADDSUB')].isdigit():
322                        last_mnem = insn[:len('VFMADDSUB123xy')]
323                    else:
324                        last_mnem = insn[:len('VFMADDSUBSS')]
325                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
326
327                elif insn.startswith('VFMADD'):
328                    if insn[len('VFMADD')].isdigit():
329                        last_mnem = insn[:len('VFMADD123PD')]
330                    else:
331                        last_mnem = insn[:len('VFMADDPD')]
332                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
333
334                elif insn.startswith('VFMSUB'):
335                    if insn[len('VFMSUB')].isdigit():
336                        last_mnem = insn[:len('VFMSUB123PD')]
337                    else:
338                        last_mnem = insn[:len('VFMSUBPD')]
339                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
340
341                elif insn.startswith('VFNMADD'):
342                    if insn[len('VFNMADD')].isdigit():
343                        last_mnem = insn[:len('VFNMADD123xy')]
344                    else:
345                        last_mnem = insn[:len('VFNMADDSS')]
346                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
347
348                elif insn.startswith('VFNMSUB'):
349                    if insn[len('VFNMSUB')].isdigit():
350                        last_mnem = insn[:len('VFNMSUB123xy')]
351                    else:
352                        last_mnem = insn[:len('VFNMSUBSS')]
353                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
354                '''
355
356            if insn in insn_id_list:
357                # trust old mapping table
358                last_mnem = insn_id_list[insn].upper()
359                print_entry(arch, insn, insn_id_list[insn].upper(), mapping, False)
360            else:
361                # the last option when we cannot find mnem: use the last good mnem
362                print_entry(arch, insn, last_mnem, mapping, True)
363