1#!/usr/bin/python
2# print list of instructions LLVM inc files, for Capstone disassembler.
3# this will be put into capstone/<arch>.h
4# by Nguyen Anh Quynh, 2019
5
6import sys
7
8if len(sys.argv) == 1:
9    print("Syntax: %s <GenAsmMatcher.inc> <GenInstrInfo.inc> MappingInsn.inc" %sys.argv[0])
10    sys.exit(1)
11
12# MappingInsn.inc
13f = open(sys.argv[3])
14mapping = f.readlines()
15f.close()
16
17print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */
18/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */
19/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */
20""")
21
22# lib/Target/X86/X86GenAsmMatcher.inc
23# static const MatchEntry MatchTable1[] = {
24#  { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, {  }, },
25
26# extract insn from GenAsmMatcher Table
27# return (arch, mnem, insn_id)
28def extract_insn(line):
29    tmp = line.split(',')
30    insn_raw = tmp[1].strip()
31    insn_mnem = tmp[0].split(' ')[3]
32    # X86 mov.s
33    if '.' in insn_mnem:
34        tmp = insn_mnem.split('.')
35        insn_mnem = tmp[0]
36    tmp = insn_raw.split('::')
37    arch = tmp[0]
38    # AArch64 -> ARM64
39    if arch.upper() == 'AArch64':
40        arch = 'ARM64'
41    return (arch, insn_mnem, tmp[1])
42
43
44
45# extract all insn lines from GenAsmMatcher
46# return arch, insn_id_list, insn_lines
47def extract_matcher(filename):
48    f = open(filename)
49    lines = f.readlines()
50    f.close()
51
52    match_count = 0
53    #insn_lines = []
54    insn_id_list = {}
55    arch = None
56    first_insn = None
57
58    pattern = None
59    # first we try to find Table1, or Table0
60    for line in lines:
61        if 'MatchEntry MatchTable0[] = {' in line.strip():
62            pattern = 'MatchEntry MatchTable0[] = {'
63        elif 'MatchEntry MatchTable1[] = {' in line.strip():
64            pattern = 'MatchEntry MatchTable1[] = {'
65            # last pattern, done
66            break
67
68    # 1st enum is register enum
69    for line in lines:
70        line = line.rstrip()
71
72        if len(line.strip()) == 0:
73            continue
74
75        if pattern in line.strip():
76            match_count += 1
77            #print(line.strip())
78            continue
79
80        line = line.strip()
81        if match_count == 1:
82            if line == '};':
83                # done with first enum
84                break
85            else:
86                _arch, mnem, insn_id = extract_insn(line)
87                if not mnem.startswith('__'):
88                    if not first_insn:
89                        arch, first_insn = _arch, insn_id
90                    if not insn_id in insn_id_list:
91                        # print("***", arch, mnem, insn_id)
92                        insn_id_list[insn_id] = mnem
93                        #insn_lines.append(line)
94
95    #return arch, first_insn, insn_id_list, insn_lines
96    return arch, first_insn, insn_id_list
97
98# GenAsmMatcher.inc
99#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1])
100arch, first_insn, insn_id_list = extract_matcher(sys.argv[1])
101arch = arch.upper()
102
103#for line in insn_id_list:
104#    print(line)
105
106
107insn_list = []
108#{
109#        X86_AAA, X86_INS_AAA,
110##ifndef CAPSTONE_DIET
111#        { 0 }, { 0 }, { X86_GRP_NOT64BITMODE, 0 }, 0, 0
112##endif
113#},
114def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong):
115    print(arch, insn_id, mnem, mnem_can_be_wrong)
116    if not mnem_can_be_wrong:
117        insn = "%s_INS_%s" %(arch.upper(), mnem.upper())
118        if insn in insn_list:
119            return
120        print("%s," %insn)
121        insn_list.append(insn)
122        return
123
124    insn = "%s_%s" %(arch.upper(), insn_id)
125    # so mnem can be wrong, we need to verify with MappingInsn.inc
126    # first, try to find this entry in old MappingInsn.inc file
127    for i in range(len(mapping)):
128        tmp = mapping[i].split(',')
129        if tmp[0].strip() == insn:
130            insn = tmp[1].strip()
131            if insn in insn_list:
132                return
133            #print("==== get below from MappingInsn.inc file: %s" %insn)
134            print("%s," %insn)
135            insn_list.append(insn)
136            return
137
138
139# extract from GenInstrInfo.inc, because the insn id is in order
140enum_count = 0
141meet_insn = False
142
143# GenInstrInfo.inc
144f = open(sys.argv[2])
145lines = f.readlines()
146f.close()
147
148count = 0
149last_mnem = None
150
151# 1st enum is register enum
152for line in lines:
153    line = line.rstrip()
154
155    if len(line.strip()) == 0:
156        continue
157
158    if line.strip() == 'enum {':
159        enum_count += 1
160        #print(line.strip())
161        continue
162
163    line = line.strip()
164    if enum_count == 1:
165        if 'INSTRUCTION_LIST_END' in line:
166            break
167        else:
168            insn = None
169            if meet_insn:
170                # enum items
171                insn = line.split('=')[0].strip()
172                if 'CALLSTACK' in insn or 'TAILJUMP' in insn:
173                    # pseudo instruction
174                    insn = None
175
176            elif line.startswith(first_insn):
177                insn = line.split('=')[0].strip()
178                meet_insn = True
179
180            if insn:
181                count += 1
182                if insn == 'BSWAP16r_BAD':
183                    last_mnem = 'BSWAP'
184                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
185                elif insn == 'CMOVNP_Fp32':
186                    last_mnem = 'FCMOVNP'
187                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
188                elif insn == 'CMOVP_Fp3':
189                    last_mnem = 'FCMOVP'
190                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
191                elif insn == 'CMPSDrm_Int':
192                    last_mnem = 'CMPSD'
193                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
194                elif insn == 'MOVSX16rm16':
195                    last_mnem = 'MOVSX'
196                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
197                elif insn == 'MOVZX16rm16':
198                    last_mnem = 'MOVZX'
199                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
200                elif insn == 'ST_Fp32m':
201                    last_mnem = 'FST'
202                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
203                elif insn == 'CMOVNP_Fp64':
204                    last_mnem = 'FCMOVNU'
205                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
206                elif insn == 'CMPSDrr_Int':
207                    last_mnem = 'CMPSD'
208                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
209                elif insn == 'CMPSSrm_Int':
210                    last_mnem = 'CMPSS'
211                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
212                elif insn == 'VCMPSDrm_Int':
213                    last_mnem = 'VCMPSD'
214                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
215                elif insn == 'VCMPSSrm_Int':
216                    last_mnem = 'VCMPSS'
217                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
218                elif insn == 'VPCMOVYrrr_REV':
219                    last_mnem = 'VPCMOV'
220                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
221                elif insn == 'VRNDSCALESDZm':
222                    last_mnem = 'VRNDSCALESD'
223                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
224                elif insn == 'VRNDSCALESSZm':
225                    last_mnem = 'VRNDSCALESS'
226                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
227                elif insn == 'VMAXCPDZ128rm':
228                    last_mnem = 'VMAXPD'
229                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
230                elif insn == 'VMAXCPSZ128rm':
231                    last_mnem = 'VMAXPS'
232                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
233                elif insn == 'VMAXCSDZrm':
234                    last_mnem = 'VMAXSD'
235                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
236                elif insn == 'VMAXCSSZrm':
237                    last_mnem = 'VMAXSS'
238                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
239                elif insn == 'VMINCPDZ128rm':
240                    last_mnem = 'VMINPD'
241                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
242                elif insn == 'VMINCPSZ128rm':
243                    last_mnem = 'VMINPS'
244                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
245                elif insn == 'VMINCSDZrm':
246                    last_mnem = 'VMINSD'
247                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
248                elif insn == 'VMINCSSZrm':
249                    last_mnem = 'VMINSS'
250                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
251                elif insn == 'VMOV64toPQIZrm':
252                    last_mnem = 'VMOVQ'
253                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
254                elif insn == 'VPERMIL2PDYrr_REV':
255                    last_mnem = 'VPERMILPD'
256                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
257                elif insn == 'VPERMIL2PSYrr_REV':
258                    last_mnem = 'VPERMILPS'
259                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
260                elif insn == 'VCVTSD2SI64Zrm_Int':
261                    last_mnem = 'VCVTSD2SI'
262                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
263                elif insn == 'VCVTSD2SSrm_Int':
264                    last_mnem = 'VCVTSD2SS'
265                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
266                elif insn ==    'VCVTSS2SI64Zrm_Int':
267                    last_mnem = 'VCVTSS2SI'
268                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
269                elif insn ==    'VCVTTSD2SI64Zrm_Int':
270                    last_mnem = 'VCVTTSD2SI'
271                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
272                elif insn ==    'VCVTTSS2SI64Zrm_Int':
273                    last_mnem = 'VCVTTSS2SI'
274                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
275
276                elif insn.startswith('VFMSUBADD'):
277                    if insn[len('VFMSUBADD')].isdigit():
278                        last_mnem = insn[:len('VFMSUBADD123xy')]
279                    else:
280                        last_mnem = insn[:len('VFMSUBADDSS')]
281                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
282
283                elif insn.startswith('VFMADDSUB'):
284                    if insn[len('VFMADDSUB')].isdigit():
285                        last_mnem = insn[:len('VFMADDSUB123xy')]
286                    else:
287                        last_mnem = insn[:len('VFMADDSUBSS')]
288                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
289
290                elif insn.startswith('VFMADD'):
291                    if insn[len('VFMADD')].isdigit():
292                        last_mnem = insn[:len('VFMADD123PD')]
293                    else:
294                        last_mnem = insn[:len('VFMADDPD')]
295                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
296
297                elif insn.startswith('VFMSUB'):
298                    if insn[len('VFMSUB')].isdigit():
299                        last_mnem = insn[:len('VFMSUB123PD')]
300                    else:
301                        last_mnem = insn[:len('VFMSUBPD')]
302                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
303
304                elif insn.startswith('VFNMADD'):
305                    if insn[len('VFNMADD')].isdigit():
306                        last_mnem = insn[:len('VFNMADD123xy')]
307                    else:
308                        last_mnem = insn[:len('VFNMADDSS')]
309                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
310
311                elif insn.startswith('VFNMSUB'):
312                    if insn[len('VFNMSUB')].isdigit():
313                        last_mnem = insn[:len('VFNMSUB123xy')]
314                    else:
315                        last_mnem = insn[:len('VFNMSUBSS')]
316                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
317
318                elif insn in insn_id_list:
319                    # trust old mapping table
320                    last_mnem = insn_id_list[insn].upper()
321                    print_entry(arch.upper(), insn, last_mnem, mapping, False)
322                else:
323                    # the last option when we cannot find mnem: use the last good mnem
324                    print_entry(arch.upper(), insn, last_mnem, mapping, True)
325