1#!/usr/bin/python 2# print MappingInsn.inc file from LLVM GenAsmMatcher.inc, for Capstone disassembler. 3# by Nguyen Anh Quynh, 2019 4 5import sys 6 7if len(sys.argv) == 1: 8 print("Syntax: %s <GenAsmMatcher.inc> <GenInstrInfo.inc> MappingInsn.inc" %sys.argv[0]) 9 sys.exit(1) 10 11f = open(sys.argv[3]) 12mapping = f.readlines() 13f.close() 14 15print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ 16/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ 17/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */ 18""") 19 20# lib/Target/X86/X86GenAsmMatcher.inc 21# static const MatchEntry MatchTable1[] = { 22# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, 23 24# extract insn from GenAsmMatcher Table 25# return (arch, mnem, insn_id) 26def extract_insn(line): 27 tmp = line.split(',') 28 insn_raw = tmp[1].strip() 29 insn_mnem = tmp[0].split(' ')[3] 30 # X86 mov.s 31 if '.' in insn_mnem: 32 tmp = insn_mnem.split('.') 33 insn_mnem = tmp[0] 34 tmp = insn_raw.split('::') 35 arch = tmp[0] 36 # AArch64 -> ARM64 37 if arch.upper() == 'AARCH64': 38 arch = 'ARM64' 39 return (arch, insn_mnem, tmp[1]) 40 41 42 43# extract all insn lines from GenAsmMatcher 44# return arch, insn_id_list, insn_lines 45def extract_matcher(filename): 46 f = open(filename) 47 lines = f.readlines() 48 f.close() 49 50 match_count = 0 51 count = 0 52 #insn_lines = [] 53 insn_id_list = {} 54 arch = None 55 first_insn = None 56 57 # 1st enum is register enum 58 for line in lines: 59 line = line.rstrip() 60 61 if len(line.strip()) == 0: 62 continue 63 64 if 'MatchEntry MatchTable1[] = {' in line.strip(): 65 match_count += 1 66 #print(line.strip()) 67 continue 68 69 line = line.strip() 70 if match_count == 1: 71 count += 1 72 if line == '};': 73 # done with first enum 74 break 75 else: 76 _arch, mnem, insn_id = extract_insn(line) 77 if count == 1: 78 arch, first_insn = _arch, insn_id 79 80 if not insn_id in insn_id_list: 81 # print("***", arch, mnem, insn_id) 82 insn_id_list[insn_id] = mnem 83 #insn_lines.append(line) 84 85 #return arch, first_insn, insn_id_list, insn_lines 86 return arch, first_insn, insn_id_list 87 88 89#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1]) 90arch, first_insn, insn_id_list = extract_matcher(sys.argv[1]) 91arch = arch.upper() 92 93#for line in insn_id_list: 94# print(line) 95 96 97#{ 98# X86_AAA, X86_INS_AAA, 99##ifndef CAPSTONE_DIET 100# { 0 }, { 0 }, { X86_GRP_NOT64BITMODE, 0 }, 0, 0 101##endif 102#}, 103def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong): 104 insn = "%s_%s" %(arch.upper(), insn_id) 105 if '64' in insn_id: 106 is64bit = '1' 107 else: 108 is64bit = '0' 109 # first, try to find this entry in old MappingInsn.inc file 110 for i in range(len(mapping)): 111 tmp = mapping[i].split(',') 112 if tmp[0].strip() == insn: 113 if not mnem_can_be_wrong: 114 print(''' 115{ 116\t%s_%s, %s_INS_%s, %s, 117#ifndef CAPSTONE_DIET 118\t%s 119#endif 120},'''% (arch, insn_id, arch, mnem, is64bit, mapping[i + 2].strip())) 121 else: 122 if not tmp[1].endswith(mnem): 123 #print("======== cannot find %s, mapping to %s (instead of %s)" %(insn, tmp[1].strip(), mnem)) 124 pass 125 print(''' 126{ 127\t%s_%s, %s, %s, 128#ifndef CAPSTONE_DIET 129\t%s 130#endif 131},'''% (arch, insn_id, tmp[1].strip(), is64bit, mapping[i + 2].strip())) 132 133 return 134 135 if mnem_can_be_wrong: 136 #print("======== CANNOT FIND %s, mapping to %s" %(insn, mnem)) 137 pass 138 139 print(''' 140{ 141\t%s_%s, %s_INS_%s, %s, 142#ifndef CAPSTONE_DIET 143\t{ 0 }, { 0 }, { 0 }, 0, 0 144#endif 145},'''% (arch, insn_id, arch, mnem, is64bit)) 146 147 148# extract from GenInstrInfo.inc, because the insn id is in order 149enum_count = 0 150meet_insn = False 151 152f = open(sys.argv[2]) 153lines = f.readlines() 154f.close() 155 156 157count = 0 158last_mnem = None 159 160# 1st enum is register enum 161for line in lines: 162 line = line.rstrip() 163 164 if len(line.strip()) == 0: 165 continue 166 167 if line.strip() == 'enum {': 168 enum_count += 1 169 #print(line.strip()) 170 continue 171 172 line = line.strip() 173 if enum_count == 1: 174 if 'INSTRUCTION_LIST_END' in line: 175 break 176 else: 177 insn = None 178 if meet_insn: 179 # enum items 180 insn = line.split('=')[0].strip() 181 if 'CALLSTACK' in insn or 'TAILJUMP' in insn: 182 # pseudo instruction 183 insn = None 184 elif line.startswith(first_insn): 185 insn = line.split('=')[0].strip() 186 meet_insn = True 187 188 if insn: 189 count += 1 190 if insn == 'BSWAP16r_BAD': 191 last_mnem = 'BSWAP' 192 print_entry(arch.upper(), insn, last_mnem, mapping, False) 193 elif insn == 'CMOVNP_Fp32': 194 last_mnem = 'FCMOVNP' 195 print_entry(arch.upper(), insn, last_mnem, mapping, False) 196 elif insn == 'CMOVP_Fp3': 197 last_mnem = 'FCMOVP' 198 print_entry(arch.upper(), insn, last_mnem, mapping, False) 199 elif insn == 'CMPSDrm_Int': 200 last_mnem = 'CMPSD' 201 print_entry(arch.upper(), insn, last_mnem, mapping, False) 202 elif insn == 'MOVSX16rm16': 203 last_mnem = 'MOVSX' 204 print_entry(arch.upper(), insn, last_mnem, mapping, False) 205 elif insn == 'MOVZX16rm16': 206 last_mnem = 'MOVZX' 207 print_entry(arch.upper(), insn, last_mnem, mapping, False) 208 elif insn == 'ST_Fp32m': 209 last_mnem = 'FST' 210 print_entry(arch.upper(), insn, last_mnem, mapping, False) 211 elif insn == 'CMOVNP_Fp64': 212 last_mnem = 'FCMOVNU' 213 print_entry(arch.upper(), insn, last_mnem, mapping, False) 214 elif insn == 'CMPSDrr_Int': 215 last_mnem = 'CMPSD' 216 print_entry(arch.upper(), insn, last_mnem, mapping, False) 217 elif insn == 'CMPSSrm_Int': 218 last_mnem = 'CMPSS' 219 print_entry(arch.upper(), insn, last_mnem, mapping, False) 220 elif insn == 'VCMPSDrm_Int': 221 last_mnem = 'VCMPSD' 222 print_entry(arch.upper(), insn, last_mnem, mapping, False) 223 elif insn == 'VCMPSSrm_Int': 224 last_mnem = 'VCMPSS' 225 print_entry(arch.upper(), insn, last_mnem, mapping, False) 226 elif insn == 'VPCMOVYrrr_REV': 227 last_mnem = 'VPCMOV' 228 print_entry(arch.upper(), insn, last_mnem, mapping, False) 229 elif insn == 'VRNDSCALESDZm': 230 last_mnem = 'VRNDSCALESD' 231 print_entry(arch.upper(), insn, last_mnem, mapping, False) 232 elif insn == 'VRNDSCALESSZm': 233 last_mnem = 'VRNDSCALESS' 234 print_entry(arch.upper(), insn, last_mnem, mapping, False) 235 elif insn == 'VMAXCPDZ128rm': 236 last_mnem = 'VMAXPD' 237 print_entry(arch.upper(), insn, last_mnem, mapping, False) 238 elif insn == 'VMAXCPSZ128rm': 239 last_mnem = 'VMAXPS' 240 print_entry(arch.upper(), insn, last_mnem, mapping, False) 241 elif insn == 'VMAXCSDZrm': 242 last_mnem = 'VMAXSD' 243 print_entry(arch.upper(), insn, last_mnem, mapping, False) 244 elif insn == 'VMAXCSSZrm': 245 last_mnem = 'VMAXSS' 246 print_entry(arch.upper(), insn, last_mnem, mapping, False) 247 elif insn == 'VMINCPDZ128rm': 248 last_mnem = 'VMINPD' 249 print_entry(arch.upper(), insn, last_mnem, mapping, False) 250 elif insn == 'VMINCPSZ128rm': 251 last_mnem = 'VMINPS' 252 print_entry(arch.upper(), insn, last_mnem, mapping, False) 253 elif insn == 'VMINCSDZrm': 254 last_mnem = 'VMINSD' 255 print_entry(arch.upper(), insn, last_mnem, mapping, False) 256 elif insn == 'VMINCSSZrm': 257 last_mnem = 'VMINSS' 258 print_entry(arch.upper(), insn, last_mnem, mapping, False) 259 elif insn == 'VMOV64toPQIZrm': 260 last_mnem = 'VMOVQ' 261 print_entry(arch.upper(), insn, last_mnem, mapping, False) 262 elif insn == 'VPERMIL2PDYrr_REV': 263 last_mnem = 'VPERMILPD' 264 print_entry(arch.upper(), insn, last_mnem, mapping, False) 265 elif insn == 'VPERMIL2PSYrr_REV': 266 last_mnem = 'VPERMILPS' 267 print_entry(arch.upper(), insn, last_mnem, mapping, False) 268 elif insn == 'VCVTSD2SI64Zrm_Int': 269 last_mnem = 'VCVTSD2SI' 270 print_entry(arch.upper(), insn, last_mnem, mapping, False) 271 elif insn == 'VCVTSD2SSrm_Int': 272 last_mnem = 'VCVTSD2SS' 273 print_entry(arch.upper(), insn, last_mnem, mapping, False) 274 elif insn == 'VCVTSS2SI64Zrm_Int': 275 last_mnem = 'VCVTSS2SI' 276 print_entry(arch.upper(), insn, last_mnem, mapping, False) 277 elif insn == 'VCVTTSD2SI64Zrm_Int': 278 last_mnem = 'VCVTTSD2SI' 279 print_entry(arch.upper(), insn, last_mnem, mapping, False) 280 elif insn == 'VCVTTSS2SI64Zrm_Int': 281 last_mnem = 'VCVTTSS2SI' 282 print_entry(arch.upper(), insn, last_mnem, mapping, False) 283 284 elif insn.startswith('VFMSUBADD'): 285 if insn[len('VFMSUBADD')].isdigit(): 286 last_mnem = insn[:len('VFMSUBADD123xy')] 287 else: 288 last_mnem = insn[:len('VFMSUBADDSS')] 289 print_entry(arch.upper(), insn, last_mnem, mapping, False) 290 291 elif insn.startswith('VFMADDSUB'): 292 if insn[len('VFMADDSUB')].isdigit(): 293 last_mnem = insn[:len('VFMADDSUB123xy')] 294 else: 295 last_mnem = insn[:len('VFMADDSUBSS')] 296 print_entry(arch.upper(), insn, last_mnem, mapping, False) 297 298 elif insn.startswith('VFMADD'): 299 if insn[len('VFMADD')].isdigit(): 300 last_mnem = insn[:len('VFMADD123PD')] 301 else: 302 last_mnem = insn[:len('VFMADDPD')] 303 print_entry(arch.upper(), insn, last_mnem, mapping, False) 304 305 elif insn.startswith('VFMSUB'): 306 if insn[len('VFMSUB')].isdigit(): 307 last_mnem = insn[:len('VFMSUB123PD')] 308 else: 309 last_mnem = insn[:len('VFMSUBPD')] 310 print_entry(arch.upper(), insn, last_mnem, mapping, False) 311 312 elif insn.startswith('VFNMADD'): 313 if insn[len('VFNMADD')].isdigit(): 314 last_mnem = insn[:len('VFNMADD123xy')] 315 else: 316 last_mnem = insn[:len('VFNMADDSS')] 317 print_entry(arch.upper(), insn, last_mnem, mapping, False) 318 319 elif insn.startswith('VFNMSUB'): 320 if insn[len('VFNMSUB')].isdigit(): 321 last_mnem = insn[:len('VFNMSUB123xy')] 322 else: 323 last_mnem = insn[:len('VFNMSUBSS')] 324 print_entry(arch.upper(), insn, last_mnem, mapping, False) 325 326 elif insn in insn_id_list: 327 # trust old mapping table 328 last_mnem = insn_id_list[insn].upper() 329 print_entry(arch.upper(), insn, insn_id_list[insn].upper(), mapping, False) 330 else: 331 # the last option when we cannot find mnem: use the last good mnem 332 print_entry(arch.upper(), insn, last_mnem, mapping, True) 333