1#!/usr/bin/python 2# print MappingInsn.inc file from LLVM GenAsmMatcher.inc, for Capstone disassembler. 3# by Nguyen Anh Quynh, 2019 4 5import sys 6 7if len(sys.argv) == 1: 8 print("Syntax: %s <GenAsmMatcher.inc> <GenInstrInfo.inc> MappingInsn.inc" %sys.argv[0]) 9 sys.exit(1) 10 11f = open(sys.argv[3]) 12mapping = f.readlines() 13f.close() 14 15print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ 16/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ 17/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */ 18""") 19 20# lib/Target/X86/X86GenAsmMatcher.inc 21# static const MatchEntry MatchTable1[] = { 22# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, 23 24# extract insn from GenAsmMatcher Table 25# return (arch, mnem, insn_id) 26def extract_insn(line): 27 tmp = line.split(',') 28 insn_raw = tmp[1].strip() 29 insn_mnem = tmp[0].split(' ')[3] 30 # X86 mov.s 31 if '.' in insn_mnem: 32 tmp = insn_mnem.split('.') 33 insn_mnem = tmp[0] 34 tmp = insn_raw.split('::') 35 arch = tmp[0] 36 # AArch64 -> ARM64 37 #if arch.upper() == 'AARCH64': 38 # arch = 'ARM64' 39 return (arch, insn_mnem, tmp[1]) 40 41 42# extract all insn lines from GenAsmMatcher 43# return arch, first_insn, insn_id_list 44def extract_matcher(filename): 45 f = open(filename) 46 lines = f.readlines() 47 f.close() 48 49 match_count = 0 50 insn_id_list = {} 51 arch = None 52 first_insn = None 53 54 pattern = None 55 # first we try to find Table1, or Table0 56 for line in lines: 57 if 'MatchEntry MatchTable0[] = {' in line.strip(): 58 pattern = 'MatchEntry MatchTable0[] = {' 59 elif 'AArch64::' in line and pattern: 60 # We do not care about Apple Assembly 61 break 62 elif 'MatchEntry MatchTable1[] = {' in line.strip(): 63 pattern = 'MatchEntry MatchTable1[] = {' 64 # last pattern, done 65 break 66 67 for line in lines: 68 line = line.rstrip() 69 70 # skip empty line 71 if len(line.strip()) == 0: 72 continue 73 74 if pattern in line.strip(): 75 match_count += 1 76 #print(line.strip()) 77 continue 78 79 line = line.strip() 80 if match_count == 1: 81 if line == '};': 82 # done with first enum 83 break 84 else: 85 _arch, mnem, insn_id = extract_insn(line) 86 # skip pseudo instructions 87 if not mnem.startswith('__'): 88 # PPC 89 if mnem.endswith('-') or mnem.endswith('+'): 90 mnem = mnem[:-1] 91 92 if not first_insn: 93 arch, first_insn = _arch, insn_id 94 95 if not insn_id in insn_id_list: 96 # save this 97 insn_id_list[insn_id] = mnem 98 99 #return arch, first_insn, insn_id_list 100 return arch, first_insn, insn_id_list 101 102 103#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1]) 104arch, first_insn, insn_id_list = extract_matcher(sys.argv[1]) 105#arch = arch.upper() 106#print("first insn = %s" %first_insn) 107 108#for line in insn_id_list: 109# print(line) 110 111 112#{ 113# X86_AAA, X86_INS_AAA, 114##ifndef CAPSTONE_DIET 115# { 0 }, { 0 }, { X86_GRP_NOT64BITMODE, 0 }, 0, 0 116##endif 117#}, 118def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong): 119 #insn = "%s_%s" %(arch.upper(), insn_id) 120 insn = "%s_%s" %(arch, insn_id) 121 arch1 = arch 122 if arch.upper() == 'AARCH64': 123 arch1 = 'ARM64' 124 #if '64' in insn_id: 125 # is64bit = '1' 126 #else: 127 # is64bit = '0' 128 # first, try to find this entry in old MappingInsn.inc file 129 for i in range(len(mapping)): 130 tmp = mapping[i].split(',') 131 if tmp[0].strip() == insn: 132 if not mnem_can_be_wrong: 133 print(''' 134{ 135\t%s, %s_INS_%s, 136#ifndef CAPSTONE_DIET 137\t%s 138#endif 139},'''% (insn, arch1, mnem, mapping[i + 2].strip())) 140 else: # ATTENTION: mnem can be wrong 141 if not tmp[1].endswith(mnem): 142 #print("======== cannot find %s, mapping to %s (instead of %s)" %(insn, tmp[1].strip(), mnem)) 143 pass 144 print(''' 145{ 146\t%s, %s, 147#ifndef CAPSTONE_DIET 148\t%s 149#endif 150},'''% (insn, tmp[1].strip(), mapping[i + 2].strip())) 151 152 return 153 154 if mnem_can_be_wrong: 155 #print("======== CANNOT FIND %s, mapping to %s" %(insn, mnem)) 156 return 157 pass 158 159 # this insn does not exist in mapping table 160 print(''' 161{ 162\t%s, %s_INS_%s, 163#ifndef CAPSTONE_DIET 164\t{ 0 }, { 0 }, { 0 }, 0, 0 165#endif 166},'''% (insn, arch1, mnem)) 167 168 169# extract from GenInstrInfo.inc, because the insn id is in order 170enum_count = 0 171meet_insn = False 172 173f = open(sys.argv[2]) 174lines = f.readlines() 175f.close() 176 177count = 0 178last_mnem = None 179 180# 1st enum is register enum 181for line in lines: 182 line = line.rstrip() 183 184 if len(line.strip()) == 0: 185 continue 186 187 # skip pseudo instructions 188 if len(line.strip()) == 0: 189 continue 190 191 if line.strip() == 'enum {': 192 enum_count += 1 193 #print(line.strip()) 194 continue 195 196 line = line.strip() 197 if enum_count == 1: 198 # skip pseudo instructions 199 if '__' in line or 'setjmp' in line or 'longjmp' in line or 'Pseudo' in line: 200 continue 201 elif 'INSTRUCTION_LIST_END' in line: 202 break 203 else: 204 insn = line.split('=')[0].strip() 205 ''' 206 insn = None 207 if meet_insn: 208 # enum items 209 insn = line.split('=')[0].strip() 210 if 'CALLSTACK' in insn or 'TAILJUMP' in insn: 211 # pseudo instruction 212 insn = None 213 elif line.startswith(first_insn): 214 insn = line.split('=')[0].strip() 215 meet_insn = True 216 217 if insn: 218 count += 1 219 if insn == 'BSWAP16r_BAD': 220 last_mnem = 'BSWAP' 221 print_entry(arch.upper(), insn, last_mnem, mapping, False) 222 elif insn == 'CMOVNP_Fp32': 223 last_mnem = 'FCMOVNP' 224 print_entry(arch.upper(), insn, last_mnem, mapping, False) 225 elif insn == 'CMOVP_Fp3': 226 last_mnem = 'FCMOVP' 227 print_entry(arch.upper(), insn, last_mnem, mapping, False) 228 elif insn == 'CMPSDrm_Int': 229 last_mnem = 'CMPSD' 230 print_entry(arch.upper(), insn, last_mnem, mapping, False) 231 elif insn == 'MOVSX16rm16': 232 last_mnem = 'MOVSX' 233 print_entry(arch.upper(), insn, last_mnem, mapping, False) 234 elif insn == 'MOVZX16rm16': 235 last_mnem = 'MOVZX' 236 print_entry(arch.upper(), insn, last_mnem, mapping, False) 237 elif insn == 'ST_Fp32m': 238 last_mnem = 'FST' 239 print_entry(arch.upper(), insn, last_mnem, mapping, False) 240 elif insn == 'CMOVNP_Fp64': 241 last_mnem = 'FCMOVNU' 242 print_entry(arch.upper(), insn, last_mnem, mapping, False) 243 elif insn == 'CMPSDrr_Int': 244 last_mnem = 'CMPSD' 245 print_entry(arch.upper(), insn, last_mnem, mapping, False) 246 elif insn == 'CMPSSrm_Int': 247 last_mnem = 'CMPSS' 248 print_entry(arch.upper(), insn, last_mnem, mapping, False) 249 elif insn == 'VCMPSDrm_Int': 250 last_mnem = 'VCMPSD' 251 print_entry(arch.upper(), insn, last_mnem, mapping, False) 252 elif insn == 'VCMPSSrm_Int': 253 last_mnem = 'VCMPSS' 254 print_entry(arch.upper(), insn, last_mnem, mapping, False) 255 elif insn == 'VPCMOVYrrr_REV': 256 last_mnem = 'VPCMOV' 257 print_entry(arch.upper(), insn, last_mnem, mapping, False) 258 elif insn == 'VRNDSCALESDZm': 259 last_mnem = 'VRNDSCALESD' 260 print_entry(arch.upper(), insn, last_mnem, mapping, False) 261 elif insn == 'VRNDSCALESSZm': 262 last_mnem = 'VRNDSCALESS' 263 print_entry(arch.upper(), insn, last_mnem, mapping, False) 264 elif insn == 'VMAXCPDZ128rm': 265 last_mnem = 'VMAXPD' 266 print_entry(arch.upper(), insn, last_mnem, mapping, False) 267 elif insn == 'VMAXCPSZ128rm': 268 last_mnem = 'VMAXPS' 269 print_entry(arch.upper(), insn, last_mnem, mapping, False) 270 elif insn == 'VMAXCSDZrm': 271 last_mnem = 'VMAXSD' 272 print_entry(arch.upper(), insn, last_mnem, mapping, False) 273 elif insn == 'VMAXCSSZrm': 274 last_mnem = 'VMAXSS' 275 print_entry(arch.upper(), insn, last_mnem, mapping, False) 276 elif insn == 'VMINCPDZ128rm': 277 last_mnem = 'VMINPD' 278 print_entry(arch.upper(), insn, last_mnem, mapping, False) 279 elif insn == 'VMINCPSZ128rm': 280 last_mnem = 'VMINPS' 281 print_entry(arch.upper(), insn, last_mnem, mapping, False) 282 elif insn == 'VMINCSDZrm': 283 last_mnem = 'VMINSD' 284 print_entry(arch.upper(), insn, last_mnem, mapping, False) 285 elif insn == 'VMINCSSZrm': 286 last_mnem = 'VMINSS' 287 print_entry(arch.upper(), insn, last_mnem, mapping, False) 288 elif insn == 'VMOV64toPQIZrm': 289 last_mnem = 'VMOVQ' 290 print_entry(arch.upper(), insn, last_mnem, mapping, False) 291 elif insn == 'VPERMIL2PDYrr_REV': 292 last_mnem = 'VPERMILPD' 293 print_entry(arch.upper(), insn, last_mnem, mapping, False) 294 elif insn == 'VPERMIL2PSYrr_REV': 295 last_mnem = 'VPERMILPS' 296 print_entry(arch.upper(), insn, last_mnem, mapping, False) 297 elif insn == 'VCVTSD2SI64Zrm_Int': 298 last_mnem = 'VCVTSD2SI' 299 print_entry(arch.upper(), insn, last_mnem, mapping, False) 300 elif insn == 'VCVTSD2SSrm_Int': 301 last_mnem = 'VCVTSD2SS' 302 print_entry(arch.upper(), insn, last_mnem, mapping, False) 303 elif insn == 'VCVTSS2SI64Zrm_Int': 304 last_mnem = 'VCVTSS2SI' 305 print_entry(arch.upper(), insn, last_mnem, mapping, False) 306 elif insn == 'VCVTTSD2SI64Zrm_Int': 307 last_mnem = 'VCVTTSD2SI' 308 print_entry(arch.upper(), insn, last_mnem, mapping, False) 309 elif insn == 'VCVTTSS2SI64Zrm_Int': 310 last_mnem = 'VCVTTSS2SI' 311 print_entry(arch.upper(), insn, last_mnem, mapping, False) 312 313 elif insn.startswith('VFMSUBADD'): 314 if insn[len('VFMSUBADD')].isdigit(): 315 last_mnem = insn[:len('VFMSUBADD123xy')] 316 else: 317 last_mnem = insn[:len('VFMSUBADDSS')] 318 print_entry(arch.upper(), insn, last_mnem, mapping, False) 319 320 elif insn.startswith('VFMADDSUB'): 321 if insn[len('VFMADDSUB')].isdigit(): 322 last_mnem = insn[:len('VFMADDSUB123xy')] 323 else: 324 last_mnem = insn[:len('VFMADDSUBSS')] 325 print_entry(arch.upper(), insn, last_mnem, mapping, False) 326 327 elif insn.startswith('VFMADD'): 328 if insn[len('VFMADD')].isdigit(): 329 last_mnem = insn[:len('VFMADD123PD')] 330 else: 331 last_mnem = insn[:len('VFMADDPD')] 332 print_entry(arch.upper(), insn, last_mnem, mapping, False) 333 334 elif insn.startswith('VFMSUB'): 335 if insn[len('VFMSUB')].isdigit(): 336 last_mnem = insn[:len('VFMSUB123PD')] 337 else: 338 last_mnem = insn[:len('VFMSUBPD')] 339 print_entry(arch.upper(), insn, last_mnem, mapping, False) 340 341 elif insn.startswith('VFNMADD'): 342 if insn[len('VFNMADD')].isdigit(): 343 last_mnem = insn[:len('VFNMADD123xy')] 344 else: 345 last_mnem = insn[:len('VFNMADDSS')] 346 print_entry(arch.upper(), insn, last_mnem, mapping, False) 347 348 elif insn.startswith('VFNMSUB'): 349 if insn[len('VFNMSUB')].isdigit(): 350 last_mnem = insn[:len('VFNMSUB123xy')] 351 else: 352 last_mnem = insn[:len('VFNMSUBSS')] 353 print_entry(arch.upper(), insn, last_mnem, mapping, False) 354 ''' 355 356 if insn in insn_id_list: 357 # trust old mapping table 358 last_mnem = insn_id_list[insn].upper() 359 print_entry(arch, insn, insn_id_list[insn].upper(), mapping, False) 360 else: 361 # the last option when we cannot find mnem: use the last good mnem 362 print_entry(arch, insn, last_mnem, mapping, True) 363