1#!/usr/bin/python 2# print MappingInsn.inc file from LLVM GenAsmMatcher.inc, for Capstone disassembler. 3# by Nguyen Anh Quynh, 2019 4 5import sys 6 7if len(sys.argv) == 1: 8 print("Syntax: %s <GenAsmMatcher.inc> <GenInstrInfo.inc> <MappingInsnOp.inc>" %sys.argv[0]) 9 sys.exit(1) 10 11f = open(sys.argv[3]) 12mapping = f.readlines() 13f.close() 14 15print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ 16/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ 17/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */ 18""") 19 20# lib/Target/X86/X86GenAsmMatcher.inc 21# static const MatchEntry MatchTable1[] = { 22# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, 23 24# extract insn from GenAsmMatcher Table 25# return (arch, mnem, insn_id) 26def extract_insn(line): 27 tmp = line.split(',') 28 insn_raw = tmp[1].strip() 29 insn_mnem = tmp[0].split(' ')[3] 30 # X86 mov.s 31 if '.' in insn_mnem: 32 tmp = insn_mnem.split('.') 33 insn_mnem = tmp[0] 34 tmp = insn_raw.split('::') 35 arch = tmp[0] 36 # AArch64 -> ARM64 37 #if arch.upper() == 'AARCH64': 38 # arch = 'ARM64' 39 return (arch, insn_mnem, tmp[1]) 40 41 42# extract all insn lines from GenAsmMatcher 43# return arch, first_insn, insn_id_list 44def extract_matcher(filename): 45 f = open(filename) 46 lines = f.readlines() 47 f.close() 48 49 match_count = 0 50 insn_id_list = {} 51 arch = None 52 first_insn = None 53 54 pattern = None 55 # first we try to find Table1, or Table0 56 for line in lines: 57 if 'MatchEntry MatchTable0[] = {' in line.strip(): 58 pattern = 'MatchEntry MatchTable0[] = {' 59 elif 'AArch64::' in line and pattern: 60 # We do not care about Apple Assembly 61 break 62 elif 'MatchEntry MatchTable1[] = {' in line.strip(): 63 pattern = 'MatchEntry MatchTable1[] = {' 64 # last pattern, done 65 break 66 67 for line in lines: 68 line = line.rstrip() 69 70 # skip empty line 71 if len(line.strip()) == 0: 72 continue 73 74 if pattern in line.strip(): 75 match_count += 1 76 #print(line.strip()) 77 continue 78 79 line = line.strip() 80 if match_count == 1: 81 if line == '};': 82 # done with first enum 83 break 84 else: 85 _arch, mnem, insn_id = extract_insn(line) 86 # skip pseudo instructions 87 if not mnem.startswith('__'): 88 if not first_insn: 89 arch, first_insn = _arch, insn_id 90 if not insn_id in insn_id_list: 91 # save this 92 insn_id_list[insn_id] = mnem 93 94 #return arch, first_insn, insn_id_list 95 return arch, first_insn, insn_id_list 96 97 98#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1]) 99arch, first_insn, insn_id_list = extract_matcher(sys.argv[1]) 100#arch = arch.upper() 101 102#for line in insn_id_list: 103# print(line) 104 105#{ /* X86_AAA, X86_INS_AAA: aaa */ 106# X86_EFLAGS_UNDEFINED_OF | X86_EFLAGS_UNDEFINED_SF | X86_EFLAGS_UNDEFINED_ZF | X86_EFLAGS_MODIFY_AF | X86_EFLAGS_UNDEFINED_PF | X86_EFLAGS_MODIFY_CF, 107# { 0 } 108#}, 109 110#{ /* ARM_ADCri, ARM_INS_ADC: adc${s}${p} $rd, $rn, $imm */ 111# { CS_AC_WRITE, CS_AC_READ, 0 } 112#}, 113 114def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong): 115 insn = "%s_%s" %(arch, insn_id) 116 arch1 = arch 117 if arch.upper() == 'AARCH64': 118 arch1 = 'ARM64' 119 # first, try to find this entry in old MappingInsn.inc file 120 for i in range(len(mapping)): 121 if mapping[i].startswith('{') and '/*' in mapping[i]: 122 #print(mapping[i]) 123 tmp = mapping[i].split('/*') 124 tmp = tmp[1].strip() 125 tmp = tmp.split(',') 126 #print("insn2 = |%s|" %tmp.strip()) 127 if tmp[0].strip() == insn: 128 if not mnem_can_be_wrong: 129 if arch.upper() == 'ARM': 130 print(''' 131{\t/* %s, %s_INS_%s: %s */ 132\t%s 133},'''% (insn, arch1, mnem, mnem.lower(), mapping[i + 1].strip())) 134 else: # ARM64 135 print(''' 136{\t/* %s, %s_INS_%s: %s */ 137\t%s 138\t%s 139},'''% (insn, arch, mnem, mnem.lower(), mapping[i + 1].strip(), mapping[i + 2].strip())) 140 else: 141 if arch.upper() == 'ARM': 142 print(''' 143{\t/* %s, %s 144\t%s 145},'''% (insn, ''.join(tmp[1:]), mapping[i + 1].strip())) 146 else: # ARM64 147 print(''' 148{\t/* %s, %s 149\t%s 150\t%s 151},'''% (insn, ''.join(tmp[1:]), mapping[i + 1].strip(), mapping[i + 2].strip())) 152 153 return 154 155 if mnem_can_be_wrong: 156 #print("======== CANNOT FIND %s, mapping to %s" %(insn, mnem)) 157 return 158 pass 159 160 # this insn does not exist in mapping table 161 if arch.upper() == 'ARM': 162 print(''' 163{\t/* %s, %s_INS_%s: %s */ 164\t{ 0 } 165},'''% (insn, arch1, mnem, mnem.lower())) 166 else: 167 print(''' 168{\t/* %s, %s_INS_%s: %s */ 169\t0, 170\t{ 0 } 171},'''% (insn, arch, mnem, mnem.lower())) 172 173 174# extract from GenInstrInfo.inc, because the insn id is in order 175enum_count = 0 176meet_insn = False 177 178f = open(sys.argv[2]) 179lines = f.readlines() 180f.close() 181 182 183count = 0 184last_mnem = None 185 186 187def is_pseudo_insn(insn, lines): 188 return False 189 for line in lines: 190 tmp = '= %s' %insn 191 if tmp in line and 'MCID::Pseudo' in line: 192 return True 193 return False 194 195 196# 1st enum is register enum 197for line in lines: 198 line = line.rstrip() 199 200 if len(line.strip()) == 0: 201 continue 202 203 if line.strip() == 'enum {': 204 enum_count += 1 205 #print(line.strip()) 206 continue 207 208 line = line.strip() 209 if enum_count == 1: 210 # skip pseudo instructions 211 if '__' in line or 'setjmp' in line or 'longjmp' in line or 'Pseudo' in line: 212 continue 213 elif 'INSTRUCTION_LIST_END' in line: 214 break 215 else: 216 insn = line.split('=')[0].strip() 217 218 # skip more pseudo instruction 219 if is_pseudo_insn(insn, lines): 220 continue 221 ''' 222 insn = None 223 if meet_insn: 224 # enum items 225 insn = line.split('=')[0].strip() 226 if 'CALLSTACK' in insn or 'TAILJUMP' in insn: 227 # pseudo instruction 228 insn = None 229 elif line.startswith(first_insn): 230 insn = line.split('=')[0].strip() 231 meet_insn = True 232 233 if insn: 234 count += 1 235 if insn == 'BSWAP16r_BAD': 236 last_mnem = 'BSWAP' 237 print_entry(arch.upper(), insn, last_mnem, mapping, False) 238 elif insn == 'CMOVNP_Fp32': 239 last_mnem = 'FCMOVNP' 240 print_entry(arch.upper(), insn, last_mnem, mapping, False) 241 elif insn == 'CMOVP_Fp3': 242 last_mnem = 'FCMOVP' 243 print_entry(arch.upper(), insn, last_mnem, mapping, False) 244 elif insn == 'CMPSDrm_Int': 245 last_mnem = 'CMPSD' 246 print_entry(arch.upper(), insn, last_mnem, mapping, False) 247 elif insn == 'MOVSX16rm16': 248 last_mnem = 'MOVSX' 249 print_entry(arch.upper(), insn, last_mnem, mapping, False) 250 elif insn == 'MOVZX16rm16': 251 last_mnem = 'MOVZX' 252 print_entry(arch.upper(), insn, last_mnem, mapping, False) 253 elif insn == 'ST_Fp32m': 254 last_mnem = 'FST' 255 print_entry(arch.upper(), insn, last_mnem, mapping, False) 256 elif insn == 'CMOVNP_Fp64': 257 last_mnem = 'FCMOVNU' 258 print_entry(arch.upper(), insn, last_mnem, mapping, False) 259 elif insn == 'CMPSDrr_Int': 260 last_mnem = 'CMPSD' 261 print_entry(arch.upper(), insn, last_mnem, mapping, False) 262 elif insn == 'CMPSSrm_Int': 263 last_mnem = 'CMPSS' 264 print_entry(arch.upper(), insn, last_mnem, mapping, False) 265 elif insn == 'VCMPSDrm_Int': 266 last_mnem = 'VCMPSD' 267 print_entry(arch.upper(), insn, last_mnem, mapping, False) 268 elif insn == 'VCMPSSrm_Int': 269 last_mnem = 'VCMPSS' 270 print_entry(arch.upper(), insn, last_mnem, mapping, False) 271 elif insn == 'VPCMOVYrrr_REV': 272 last_mnem = 'VPCMOV' 273 print_entry(arch.upper(), insn, last_mnem, mapping, False) 274 elif insn == 'VRNDSCALESDZm': 275 last_mnem = 'VRNDSCALESD' 276 print_entry(arch.upper(), insn, last_mnem, mapping, False) 277 elif insn == 'VRNDSCALESSZm': 278 last_mnem = 'VRNDSCALESS' 279 print_entry(arch.upper(), insn, last_mnem, mapping, False) 280 elif insn == 'VMAXCPDZ128rm': 281 last_mnem = 'VMAXPD' 282 print_entry(arch.upper(), insn, last_mnem, mapping, False) 283 elif insn == 'VMAXCPSZ128rm': 284 last_mnem = 'VMAXPS' 285 print_entry(arch.upper(), insn, last_mnem, mapping, False) 286 elif insn == 'VMAXCSDZrm': 287 last_mnem = 'VMAXSD' 288 print_entry(arch.upper(), insn, last_mnem, mapping, False) 289 elif insn == 'VMAXCSSZrm': 290 last_mnem = 'VMAXSS' 291 print_entry(arch.upper(), insn, last_mnem, mapping, False) 292 elif insn == 'VMINCPDZ128rm': 293 last_mnem = 'VMINPD' 294 print_entry(arch.upper(), insn, last_mnem, mapping, False) 295 elif insn == 'VMINCPSZ128rm': 296 last_mnem = 'VMINPS' 297 print_entry(arch.upper(), insn, last_mnem, mapping, False) 298 elif insn == 'VMINCSDZrm': 299 last_mnem = 'VMINSD' 300 print_entry(arch.upper(), insn, last_mnem, mapping, False) 301 elif insn == 'VMINCSSZrm': 302 last_mnem = 'VMINSS' 303 print_entry(arch.upper(), insn, last_mnem, mapping, False) 304 elif insn == 'VMOV64toPQIZrm': 305 last_mnem = 'VMOVQ' 306 print_entry(arch.upper(), insn, last_mnem, mapping, False) 307 elif insn == 'VPERMIL2PDYrr_REV': 308 last_mnem = 'VPERMILPD' 309 print_entry(arch.upper(), insn, last_mnem, mapping, False) 310 elif insn == 'VPERMIL2PSYrr_REV': 311 last_mnem = 'VPERMILPS' 312 print_entry(arch.upper(), insn, last_mnem, mapping, False) 313 elif insn == 'VCVTSD2SI64Zrm_Int': 314 last_mnem = 'VCVTSD2SI' 315 print_entry(arch.upper(), insn, last_mnem, mapping, False) 316 elif insn == 'VCVTSD2SSrm_Int': 317 last_mnem = 'VCVTSD2SS' 318 print_entry(arch.upper(), insn, last_mnem, mapping, False) 319 elif insn == 'VCVTSS2SI64Zrm_Int': 320 last_mnem = 'VCVTSS2SI' 321 print_entry(arch.upper(), insn, last_mnem, mapping, False) 322 elif insn == 'VCVTTSD2SI64Zrm_Int': 323 last_mnem = 'VCVTTSD2SI' 324 print_entry(arch.upper(), insn, last_mnem, mapping, False) 325 elif insn == 'VCVTTSS2SI64Zrm_Int': 326 last_mnem = 'VCVTTSS2SI' 327 print_entry(arch.upper(), insn, last_mnem, mapping, False) 328 329 elif insn.startswith('VFMSUBADD'): 330 if insn[len('VFMSUBADD')].isdigit(): 331 last_mnem = insn[:len('VFMSUBADD123xy')] 332 else: 333 last_mnem = insn[:len('VFMSUBADDSS')] 334 print_entry(arch.upper(), insn, last_mnem, mapping, False) 335 336 elif insn.startswith('VFMADDSUB'): 337 if insn[len('VFMADDSUB')].isdigit(): 338 last_mnem = insn[:len('VFMADDSUB123xy')] 339 else: 340 last_mnem = insn[:len('VFMADDSUBSS')] 341 print_entry(arch.upper(), insn, last_mnem, mapping, False) 342 343 elif insn.startswith('VFMADD'): 344 if insn[len('VFMADD')].isdigit(): 345 last_mnem = insn[:len('VFMADD123PD')] 346 else: 347 last_mnem = insn[:len('VFMADDPD')] 348 print_entry(arch.upper(), insn, last_mnem, mapping, False) 349 350 elif insn.startswith('VFMSUB'): 351 if insn[len('VFMSUB')].isdigit(): 352 last_mnem = insn[:len('VFMSUB123PD')] 353 else: 354 last_mnem = insn[:len('VFMSUBPD')] 355 print_entry(arch.upper(), insn, last_mnem, mapping, False) 356 357 elif insn.startswith('VFNMADD'): 358 if insn[len('VFNMADD')].isdigit(): 359 last_mnem = insn[:len('VFNMADD123xy')] 360 else: 361 last_mnem = insn[:len('VFNMADDSS')] 362 print_entry(arch.upper(), insn, last_mnem, mapping, False) 363 364 elif insn.startswith('VFNMSUB'): 365 if insn[len('VFNMSUB')].isdigit(): 366 last_mnem = insn[:len('VFNMSUB123xy')] 367 else: 368 last_mnem = insn[:len('VFNMSUBSS')] 369 print_entry(arch.upper(), insn, last_mnem, mapping, False) 370 ''' 371 372 if insn in insn_id_list: 373 # trust old mapping table 374 last_mnem = insn_id_list[insn].upper() 375 print_entry(arch, insn, insn_id_list[insn].upper(), mapping, False) 376 else: 377 #pass 378 # the last option when we cannot find mnem: use the last good mnem 379 print_entry(arch, insn, last_mnem, mapping, True) 380