1#!/usr/bin/python 2# print list of instructions LLVM inc files, for Capstone disassembler. 3# this will be put into capstone/<arch>.h 4# by Nguyen Anh Quynh, 2019 5 6import sys 7 8if len(sys.argv) == 1: 9 print("Syntax: %s <GenAsmMatcher.inc> <GenInstrInfo.inc> MappingInsn.inc" %sys.argv[0]) 10 sys.exit(1) 11 12# MappingInsn.inc 13f = open(sys.argv[3]) 14mapping = f.readlines() 15f.close() 16 17print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ 18/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ 19/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */ 20""") 21 22# lib/Target/X86/X86GenAsmMatcher.inc 23# static const MatchEntry MatchTable1[] = { 24# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, 25 26# extract insn from GenAsmMatcher Table 27# return (arch, mnem, insn_id) 28def extract_insn(line): 29 tmp = line.split(',') 30 insn_raw = tmp[1].strip() 31 insn_mnem = tmp[0].split(' ')[3] 32 # X86 mov.s 33 if '.' in insn_mnem: 34 tmp = insn_mnem.split('.') 35 insn_mnem = tmp[0] 36 tmp = insn_raw.split('::') 37 arch = tmp[0] 38 # AArch64 -> ARM64 39 if arch.upper() == 'AArch64': 40 arch = 'ARM64' 41 return (arch, insn_mnem, tmp[1]) 42 43 44 45# extract all insn lines from GenAsmMatcher 46# return arch, insn_id_list, insn_lines 47def extract_matcher(filename): 48 f = open(filename) 49 lines = f.readlines() 50 f.close() 51 52 match_count = 0 53 #insn_lines = [] 54 insn_id_list = {} 55 arch = None 56 first_insn = None 57 58 pattern = None 59 # first we try to find Table1, or Table0 60 for line in lines: 61 if 'MatchEntry MatchTable0[] = {' in line.strip(): 62 pattern = 'MatchEntry MatchTable0[] = {' 63 elif 'MatchEntry MatchTable1[] = {' in line.strip(): 64 pattern = 'MatchEntry MatchTable1[] = {' 65 # last pattern, done 66 break 67 68 # 1st enum is register enum 69 for line in lines: 70 line = line.rstrip() 71 72 if len(line.strip()) == 0: 73 continue 74 75 if pattern in line.strip(): 76 match_count += 1 77 #print(line.strip()) 78 continue 79 80 line = line.strip() 81 if match_count == 1: 82 if line == '};': 83 # done with first enum 84 break 85 else: 86 _arch, mnem, insn_id = extract_insn(line) 87 if not mnem.startswith('__'): 88 if not first_insn: 89 arch, first_insn = _arch, insn_id 90 if not insn_id in insn_id_list: 91 # print("***", arch, mnem, insn_id) 92 insn_id_list[insn_id] = mnem 93 #insn_lines.append(line) 94 95 #return arch, first_insn, insn_id_list, insn_lines 96 return arch, first_insn, insn_id_list 97 98# GenAsmMatcher.inc 99#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1]) 100arch, first_insn, insn_id_list = extract_matcher(sys.argv[1]) 101arch = arch.upper() 102 103#for line in insn_id_list: 104# print(line) 105 106 107insn_list = [] 108#{ 109# X86_AAA, X86_INS_AAA, 110##ifndef CAPSTONE_DIET 111# { 0 }, { 0 }, { X86_GRP_NOT64BITMODE, 0 }, 0, 0 112##endif 113#}, 114def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong): 115 print(arch, insn_id, mnem, mnem_can_be_wrong) 116 if not mnem_can_be_wrong: 117 insn = "%s_INS_%s" %(arch.upper(), mnem.upper()) 118 if insn in insn_list: 119 return 120 print("%s," %insn) 121 insn_list.append(insn) 122 return 123 124 insn = "%s_%s" %(arch.upper(), insn_id) 125 # so mnem can be wrong, we need to verify with MappingInsn.inc 126 # first, try to find this entry in old MappingInsn.inc file 127 for i in range(len(mapping)): 128 tmp = mapping[i].split(',') 129 if tmp[0].strip() == insn: 130 insn = tmp[1].strip() 131 if insn in insn_list: 132 return 133 #print("==== get below from MappingInsn.inc file: %s" %insn) 134 print("%s," %insn) 135 insn_list.append(insn) 136 return 137 138 139# extract from GenInstrInfo.inc, because the insn id is in order 140enum_count = 0 141meet_insn = False 142 143# GenInstrInfo.inc 144f = open(sys.argv[2]) 145lines = f.readlines() 146f.close() 147 148count = 0 149last_mnem = None 150 151# 1st enum is register enum 152for line in lines: 153 line = line.rstrip() 154 155 if len(line.strip()) == 0: 156 continue 157 158 if line.strip() == 'enum {': 159 enum_count += 1 160 #print(line.strip()) 161 continue 162 163 line = line.strip() 164 if enum_count == 1: 165 if 'INSTRUCTION_LIST_END' in line: 166 break 167 else: 168 insn = None 169 if meet_insn: 170 # enum items 171 insn = line.split('=')[0].strip() 172 if 'CALLSTACK' in insn or 'TAILJUMP' in insn: 173 # pseudo instruction 174 insn = None 175 176 elif line.startswith(first_insn): 177 insn = line.split('=')[0].strip() 178 meet_insn = True 179 180 if insn: 181 count += 1 182 if insn == 'BSWAP16r_BAD': 183 last_mnem = 'BSWAP' 184 print_entry(arch.upper(), insn, last_mnem, mapping, False) 185 elif insn == 'CMOVNP_Fp32': 186 last_mnem = 'FCMOVNP' 187 print_entry(arch.upper(), insn, last_mnem, mapping, False) 188 elif insn == 'CMOVP_Fp3': 189 last_mnem = 'FCMOVP' 190 print_entry(arch.upper(), insn, last_mnem, mapping, False) 191 elif insn == 'CMPSDrm_Int': 192 last_mnem = 'CMPSD' 193 print_entry(arch.upper(), insn, last_mnem, mapping, False) 194 elif insn == 'MOVSX16rm16': 195 last_mnem = 'MOVSX' 196 print_entry(arch.upper(), insn, last_mnem, mapping, False) 197 elif insn == 'MOVZX16rm16': 198 last_mnem = 'MOVZX' 199 print_entry(arch.upper(), insn, last_mnem, mapping, False) 200 elif insn == 'ST_Fp32m': 201 last_mnem = 'FST' 202 print_entry(arch.upper(), insn, last_mnem, mapping, False) 203 elif insn == 'CMOVNP_Fp64': 204 last_mnem = 'FCMOVNU' 205 print_entry(arch.upper(), insn, last_mnem, mapping, False) 206 elif insn == 'CMPSDrr_Int': 207 last_mnem = 'CMPSD' 208 print_entry(arch.upper(), insn, last_mnem, mapping, False) 209 elif insn == 'CMPSSrm_Int': 210 last_mnem = 'CMPSS' 211 print_entry(arch.upper(), insn, last_mnem, mapping, False) 212 elif insn == 'VCMPSDrm_Int': 213 last_mnem = 'VCMPSD' 214 print_entry(arch.upper(), insn, last_mnem, mapping, False) 215 elif insn == 'VCMPSSrm_Int': 216 last_mnem = 'VCMPSS' 217 print_entry(arch.upper(), insn, last_mnem, mapping, False) 218 elif insn == 'VPCMOVYrrr_REV': 219 last_mnem = 'VPCMOV' 220 print_entry(arch.upper(), insn, last_mnem, mapping, False) 221 elif insn == 'VRNDSCALESDZm': 222 last_mnem = 'VRNDSCALESD' 223 print_entry(arch.upper(), insn, last_mnem, mapping, False) 224 elif insn == 'VRNDSCALESSZm': 225 last_mnem = 'VRNDSCALESS' 226 print_entry(arch.upper(), insn, last_mnem, mapping, False) 227 elif insn == 'VMAXCPDZ128rm': 228 last_mnem = 'VMAXPD' 229 print_entry(arch.upper(), insn, last_mnem, mapping, False) 230 elif insn == 'VMAXCPSZ128rm': 231 last_mnem = 'VMAXPS' 232 print_entry(arch.upper(), insn, last_mnem, mapping, False) 233 elif insn == 'VMAXCSDZrm': 234 last_mnem = 'VMAXSD' 235 print_entry(arch.upper(), insn, last_mnem, mapping, False) 236 elif insn == 'VMAXCSSZrm': 237 last_mnem = 'VMAXSS' 238 print_entry(arch.upper(), insn, last_mnem, mapping, False) 239 elif insn == 'VMINCPDZ128rm': 240 last_mnem = 'VMINPD' 241 print_entry(arch.upper(), insn, last_mnem, mapping, False) 242 elif insn == 'VMINCPSZ128rm': 243 last_mnem = 'VMINPS' 244 print_entry(arch.upper(), insn, last_mnem, mapping, False) 245 elif insn == 'VMINCSDZrm': 246 last_mnem = 'VMINSD' 247 print_entry(arch.upper(), insn, last_mnem, mapping, False) 248 elif insn == 'VMINCSSZrm': 249 last_mnem = 'VMINSS' 250 print_entry(arch.upper(), insn, last_mnem, mapping, False) 251 elif insn == 'VMOV64toPQIZrm': 252 last_mnem = 'VMOVQ' 253 print_entry(arch.upper(), insn, last_mnem, mapping, False) 254 elif insn == 'VPERMIL2PDYrr_REV': 255 last_mnem = 'VPERMILPD' 256 print_entry(arch.upper(), insn, last_mnem, mapping, False) 257 elif insn == 'VPERMIL2PSYrr_REV': 258 last_mnem = 'VPERMILPS' 259 print_entry(arch.upper(), insn, last_mnem, mapping, False) 260 elif insn == 'VCVTSD2SI64Zrm_Int': 261 last_mnem = 'VCVTSD2SI' 262 print_entry(arch.upper(), insn, last_mnem, mapping, False) 263 elif insn == 'VCVTSD2SSrm_Int': 264 last_mnem = 'VCVTSD2SS' 265 print_entry(arch.upper(), insn, last_mnem, mapping, False) 266 elif insn == 'VCVTSS2SI64Zrm_Int': 267 last_mnem = 'VCVTSS2SI' 268 print_entry(arch.upper(), insn, last_mnem, mapping, False) 269 elif insn == 'VCVTTSD2SI64Zrm_Int': 270 last_mnem = 'VCVTTSD2SI' 271 print_entry(arch.upper(), insn, last_mnem, mapping, False) 272 elif insn == 'VCVTTSS2SI64Zrm_Int': 273 last_mnem = 'VCVTTSS2SI' 274 print_entry(arch.upper(), insn, last_mnem, mapping, False) 275 276 elif insn.startswith('VFMSUBADD'): 277 if insn[len('VFMSUBADD')].isdigit(): 278 last_mnem = insn[:len('VFMSUBADD123xy')] 279 else: 280 last_mnem = insn[:len('VFMSUBADDSS')] 281 print_entry(arch.upper(), insn, last_mnem, mapping, False) 282 283 elif insn.startswith('VFMADDSUB'): 284 if insn[len('VFMADDSUB')].isdigit(): 285 last_mnem = insn[:len('VFMADDSUB123xy')] 286 else: 287 last_mnem = insn[:len('VFMADDSUBSS')] 288 print_entry(arch.upper(), insn, last_mnem, mapping, False) 289 290 elif insn.startswith('VFMADD'): 291 if insn[len('VFMADD')].isdigit(): 292 last_mnem = insn[:len('VFMADD123PD')] 293 else: 294 last_mnem = insn[:len('VFMADDPD')] 295 print_entry(arch.upper(), insn, last_mnem, mapping, False) 296 297 elif insn.startswith('VFMSUB'): 298 if insn[len('VFMSUB')].isdigit(): 299 last_mnem = insn[:len('VFMSUB123PD')] 300 else: 301 last_mnem = insn[:len('VFMSUBPD')] 302 print_entry(arch.upper(), insn, last_mnem, mapping, False) 303 304 elif insn.startswith('VFNMADD'): 305 if insn[len('VFNMADD')].isdigit(): 306 last_mnem = insn[:len('VFNMADD123xy')] 307 else: 308 last_mnem = insn[:len('VFNMADDSS')] 309 print_entry(arch.upper(), insn, last_mnem, mapping, False) 310 311 elif insn.startswith('VFNMSUB'): 312 if insn[len('VFNMSUB')].isdigit(): 313 last_mnem = insn[:len('VFNMSUB123xy')] 314 else: 315 last_mnem = insn[:len('VFNMSUBSS')] 316 print_entry(arch.upper(), insn, last_mnem, mapping, False) 317 318 elif insn in insn_id_list: 319 # trust old mapping table 320 last_mnem = insn_id_list[insn].upper() 321 print_entry(arch.upper(), insn, last_mnem, mapping, False) 322 else: 323 # the last option when we cannot find mnem: use the last good mnem 324 print_entry(arch.upper(), insn, last_mnem, mapping, True) 325