1#!/usr/bin/python
2# print MappingInsn.inc file from LLVM GenAsmMatcher.inc, for Capstone disassembler.
3# by Nguyen Anh Quynh, 2019
4
5import sys
6
7if len(sys.argv) == 1:
8    print("Syntax: %s <GenAsmMatcher.inc> <GenInstrInfo.inc> <MappingInsnOp.inc>" %sys.argv[0])
9    sys.exit(1)
10
11f = open(sys.argv[3])
12mapping = f.readlines()
13f.close()
14
15print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */
16/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */
17/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */
18""")
19
20# lib/Target/X86/X86GenAsmMatcher.inc
21# static const MatchEntry MatchTable1[] = {
22#  { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, {  }, },
23
24# extract insn from GenAsmMatcher Table
25# return (arch, mnem, insn_id)
26def extract_insn(line):
27    tmp = line.split(',')
28    insn_raw = tmp[1].strip()
29    insn_mnem = tmp[0].split(' ')[3]
30    # X86 mov.s
31    if '.' in insn_mnem:
32        tmp = insn_mnem.split('.')
33        insn_mnem = tmp[0]
34    tmp = insn_raw.split('::')
35    arch = tmp[0]
36    # AArch64 -> ARM64
37    #if arch.upper() == 'AARCH64':
38    #    arch = 'ARM64'
39    return (arch, insn_mnem, tmp[1])
40
41
42# extract all insn lines from GenAsmMatcher
43# return arch, first_insn, insn_id_list
44def extract_matcher(filename):
45    f = open(filename)
46    lines = f.readlines()
47    f.close()
48
49    match_count = 0
50    insn_id_list = {}
51    arch = None
52    first_insn = None
53
54    pattern = None
55    # first we try to find Table1, or Table0
56    for line in lines:
57        if 'MatchEntry MatchTable0[] = {' in line.strip():
58            pattern = 'MatchEntry MatchTable0[] = {'
59        elif 'AArch64::' in line and pattern:
60            # We do not care about Apple Assembly
61            break
62        elif 'MatchEntry MatchTable1[] = {' in line.strip():
63            pattern = 'MatchEntry MatchTable1[] = {'
64            # last pattern, done
65            break
66
67    for line in lines:
68        line = line.rstrip()
69
70        # skip empty line
71        if len(line.strip()) == 0:
72            continue
73
74        if pattern in line.strip():
75            match_count += 1
76            #print(line.strip())
77            continue
78
79        line = line.strip()
80        if match_count == 1:
81            if line == '};':
82                # done with first enum
83                break
84            else:
85                _arch, mnem, insn_id = extract_insn(line)
86                # skip pseudo instructions
87                if not mnem.startswith('__'):
88                    if not first_insn:
89                        arch, first_insn = _arch, insn_id
90                    if not insn_id in insn_id_list:
91                        # save this
92                        insn_id_list[insn_id] = mnem
93
94    #return arch, first_insn, insn_id_list
95    return arch, first_insn, insn_id_list
96
97
98#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1])
99arch, first_insn, insn_id_list = extract_matcher(sys.argv[1])
100#arch = arch.upper()
101
102#for line in insn_id_list:
103#    print(line)
104
105#{ /* X86_AAA, X86_INS_AAA: aaa */
106#  X86_EFLAGS_UNDEFINED_OF | X86_EFLAGS_UNDEFINED_SF | X86_EFLAGS_UNDEFINED_ZF | X86_EFLAGS_MODIFY_AF | X86_EFLAGS_UNDEFINED_PF | X86_EFLAGS_MODIFY_CF,
107#  { 0 }
108#},
109
110#{       /* ARM_ADCri, ARM_INS_ADC: adc${s}${p}  $rd, $rn, $imm */
111#        { CS_AC_WRITE, CS_AC_READ, 0 }
112#},
113
114def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong):
115    insn = "%s_%s" %(arch, insn_id)
116    arch1 = arch
117    if arch.upper() == 'AARCH64':
118        arch1 = 'ARM64'
119    # first, try to find this entry in old MappingInsn.inc file
120    for i in range(len(mapping)):
121        if mapping[i].startswith('{') and '/*' in mapping[i]:
122            #print(mapping[i])
123            tmp = mapping[i].split('/*')
124            tmp = tmp[1].strip()
125            tmp = tmp.split(',')
126            #print("insn2 = |%s|" %tmp.strip())
127            if tmp[0].strip() == insn:
128                if not mnem_can_be_wrong:
129                    if arch.upper() == 'ARM':
130                        print('''
131{\t/* %s, %s_INS_%s: %s */
132\t%s
133},'''% (insn, arch1, mnem, mnem.lower(), mapping[i + 1].strip()))
134                    else:   # ARM64
135                        print('''
136{\t/* %s, %s_INS_%s: %s */
137\t%s
138\t%s
139},'''% (insn, arch, mnem, mnem.lower(), mapping[i + 1].strip(), mapping[i + 2].strip()))
140                else:
141                    if arch.upper() == 'ARM':
142                        print('''
143{\t/* %s, %s
144\t%s
145},'''% (insn, ''.join(tmp[1:]), mapping[i + 1].strip()))
146                    else:   # ARM64
147                        print('''
148{\t/* %s, %s
149\t%s
150\t%s
151},'''% (insn, ''.join(tmp[1:]), mapping[i + 1].strip(), mapping[i + 2].strip()))
152
153                return
154
155    if mnem_can_be_wrong:
156        #print("======== CANNOT FIND %s, mapping to %s" %(insn, mnem))
157        return
158        pass
159
160    # this insn does not exist in mapping table
161    if arch.upper() == 'ARM':
162        print('''
163{\t/* %s, %s_INS_%s: %s */
164\t{ 0 }
165},'''% (insn, arch1, mnem, mnem.lower()))
166    else:
167        print('''
168{\t/* %s, %s_INS_%s: %s */
169\t0,
170\t{ 0 }
171},'''% (insn, arch, mnem, mnem.lower()))
172
173
174# extract from GenInstrInfo.inc, because the insn id is in order
175enum_count = 0
176meet_insn = False
177
178f = open(sys.argv[2])
179lines = f.readlines()
180f.close()
181
182
183count = 0
184last_mnem = None
185
186
187def is_pseudo_insn(insn, lines):
188    return False
189    for line in lines:
190        tmp = '= %s' %insn
191        if tmp in line and 'MCID::Pseudo' in line:
192            return True
193    return False
194
195
196# 1st enum is register enum
197for line in lines:
198    line = line.rstrip()
199
200    if len(line.strip()) == 0:
201        continue
202
203    if line.strip() == 'enum {':
204        enum_count += 1
205        #print(line.strip())
206        continue
207
208    line = line.strip()
209    if enum_count == 1:
210        # skip pseudo instructions
211        if '__' in line or 'setjmp' in line or 'longjmp' in line or 'Pseudo' in line:
212            continue
213        elif 'INSTRUCTION_LIST_END' in line:
214            break
215        else:
216            insn = line.split('=')[0].strip()
217
218            # skip more pseudo instruction
219            if is_pseudo_insn(insn, lines):
220                continue
221            '''
222            insn = None
223            if meet_insn:
224                # enum items
225                insn = line.split('=')[0].strip()
226                if 'CALLSTACK' in insn or 'TAILJUMP' in insn:
227                    # pseudo instruction
228                    insn = None
229            elif line.startswith(first_insn):
230                insn = line.split('=')[0].strip()
231                meet_insn = True
232
233            if insn:
234                count += 1
235                if insn == 'BSWAP16r_BAD':
236                    last_mnem = 'BSWAP'
237                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
238                elif insn == 'CMOVNP_Fp32':
239                    last_mnem = 'FCMOVNP'
240                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
241                elif insn == 'CMOVP_Fp3':
242                    last_mnem = 'FCMOVP'
243                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
244                elif insn == 'CMPSDrm_Int':
245                    last_mnem = 'CMPSD'
246                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
247                elif insn == 'MOVSX16rm16':
248                    last_mnem = 'MOVSX'
249                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
250                elif insn == 'MOVZX16rm16':
251                    last_mnem = 'MOVZX'
252                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
253                elif insn == 'ST_Fp32m':
254                    last_mnem = 'FST'
255                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
256                elif insn == 'CMOVNP_Fp64':
257                    last_mnem = 'FCMOVNU'
258                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
259                elif insn == 'CMPSDrr_Int':
260                    last_mnem = 'CMPSD'
261                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
262                elif insn == 'CMPSSrm_Int':
263                    last_mnem = 'CMPSS'
264                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
265                elif insn == 'VCMPSDrm_Int':
266                    last_mnem = 'VCMPSD'
267                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
268                elif insn == 'VCMPSSrm_Int':
269                    last_mnem = 'VCMPSS'
270                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
271                elif insn == 'VPCMOVYrrr_REV':
272                    last_mnem = 'VPCMOV'
273                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
274                elif insn == 'VRNDSCALESDZm':
275                    last_mnem = 'VRNDSCALESD'
276                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
277                elif insn == 'VRNDSCALESSZm':
278                    last_mnem = 'VRNDSCALESS'
279                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
280                elif insn == 'VMAXCPDZ128rm':
281                    last_mnem = 'VMAXPD'
282                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
283                elif insn == 'VMAXCPSZ128rm':
284                    last_mnem = 'VMAXPS'
285                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
286                elif insn == 'VMAXCSDZrm':
287                    last_mnem = 'VMAXSD'
288                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
289                elif insn == 'VMAXCSSZrm':
290                    last_mnem = 'VMAXSS'
291                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
292                elif insn == 'VMINCPDZ128rm':
293                    last_mnem = 'VMINPD'
294                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
295                elif insn == 'VMINCPSZ128rm':
296                    last_mnem = 'VMINPS'
297                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
298                elif insn == 'VMINCSDZrm':
299                    last_mnem = 'VMINSD'
300                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
301                elif insn == 'VMINCSSZrm':
302                    last_mnem = 'VMINSS'
303                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
304                elif insn == 'VMOV64toPQIZrm':
305                    last_mnem = 'VMOVQ'
306                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
307                elif insn == 'VPERMIL2PDYrr_REV':
308                    last_mnem = 'VPERMILPD'
309                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
310                elif insn == 'VPERMIL2PSYrr_REV':
311                    last_mnem = 'VPERMILPS'
312                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
313                elif insn == 'VCVTSD2SI64Zrm_Int':
314                    last_mnem = 'VCVTSD2SI'
315                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
316                elif insn == 'VCVTSD2SSrm_Int':
317                    last_mnem = 'VCVTSD2SS'
318                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
319                elif insn ==    'VCVTSS2SI64Zrm_Int':
320                    last_mnem = 'VCVTSS2SI'
321                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
322                elif insn ==    'VCVTTSD2SI64Zrm_Int':
323                    last_mnem = 'VCVTTSD2SI'
324                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
325                elif insn ==    'VCVTTSS2SI64Zrm_Int':
326                    last_mnem = 'VCVTTSS2SI'
327                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
328
329                elif insn.startswith('VFMSUBADD'):
330                    if insn[len('VFMSUBADD')].isdigit():
331                        last_mnem = insn[:len('VFMSUBADD123xy')]
332                    else:
333                        last_mnem = insn[:len('VFMSUBADDSS')]
334                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
335
336                elif insn.startswith('VFMADDSUB'):
337                    if insn[len('VFMADDSUB')].isdigit():
338                        last_mnem = insn[:len('VFMADDSUB123xy')]
339                    else:
340                        last_mnem = insn[:len('VFMADDSUBSS')]
341                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
342
343                elif insn.startswith('VFMADD'):
344                    if insn[len('VFMADD')].isdigit():
345                        last_mnem = insn[:len('VFMADD123PD')]
346                    else:
347                        last_mnem = insn[:len('VFMADDPD')]
348                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
349
350                elif insn.startswith('VFMSUB'):
351                    if insn[len('VFMSUB')].isdigit():
352                        last_mnem = insn[:len('VFMSUB123PD')]
353                    else:
354                        last_mnem = insn[:len('VFMSUBPD')]
355                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
356
357                elif insn.startswith('VFNMADD'):
358                    if insn[len('VFNMADD')].isdigit():
359                        last_mnem = insn[:len('VFNMADD123xy')]
360                    else:
361                        last_mnem = insn[:len('VFNMADDSS')]
362                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
363
364                elif insn.startswith('VFNMSUB'):
365                    if insn[len('VFNMSUB')].isdigit():
366                        last_mnem = insn[:len('VFNMSUB123xy')]
367                    else:
368                        last_mnem = insn[:len('VFNMSUBSS')]
369                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
370                '''
371
372            if insn in insn_id_list:
373                # trust old mapping table
374                last_mnem = insn_id_list[insn].upper()
375                print_entry(arch, insn, insn_id_list[insn].upper(), mapping, False)
376            else:
377                #pass
378                # the last option when we cannot find mnem: use the last good mnem
379                print_entry(arch, insn, last_mnem, mapping, True)
380