1#!/usr/bin/python 2# print MappingInsn.inc file from LLVM GenAsmMatcher.inc, for Capstone disassembler. 3# by Nguyen Anh Quynh, 2019 4 5import sys 6 7if len(sys.argv) == 1: 8 print("Syntax: %s <GenAsmMatcher.inc> <GenInstrInfo.inc> <MappingInsnOp.inc>" %sys.argv[0]) 9 sys.exit(1) 10 11f = open(sys.argv[3]) 12mapping = f.readlines() 13f.close() 14 15print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ 16/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ 17/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */ 18""") 19 20# lib/Target/X86/X86GenAsmMatcher.inc 21# static const MatchEntry MatchTable1[] = { 22# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, 23 24# extract insn from GenAsmMatcher Table 25# return (arch, mnem, insn_id) 26def extract_insn(line): 27 tmp = line.split(',') 28 insn_raw = tmp[1].strip() 29 insn_mnem = tmp[0].split(' ')[3] 30 # X86 mov.s 31 if '.' in insn_mnem: 32 tmp = insn_mnem.split('.') 33 insn_mnem = tmp[0] 34 tmp = insn_raw.split('::') 35 arch = tmp[0] 36 # AArch64 -> ARM64 37 if arch.upper() == 'AARCH64': 38 arch = 'ARM64' 39 return (arch, insn_mnem, tmp[1]) 40 41 42 43# extract all insn lines from GenAsmMatcher 44# return arch, insn_id_list, insn_lines 45def extract_matcher(filename): 46 f = open(filename) 47 lines = f.readlines() 48 f.close() 49 50 match_count = 0 51 count = 0 52 #insn_lines = [] 53 insn_id_list = {} 54 arch = None 55 first_insn = None 56 57 # 1st enum is register enum 58 for line in lines: 59 line = line.rstrip() 60 61 if len(line.strip()) == 0: 62 continue 63 64 if 'MatchEntry MatchTable1[] = {' in line.strip(): 65 match_count += 1 66 #print(line.strip()) 67 continue 68 69 line = line.strip() 70 if match_count == 1: 71 count += 1 72 if line == '};': 73 # done with first enum 74 break 75 else: 76 _arch, mnem, insn_id = extract_insn(line) 77 if count == 1: 78 arch, first_insn = _arch, insn_id 79 80 if not insn_id in insn_id_list: 81 # print("***", arch, mnem, insn_id) 82 insn_id_list[insn_id] = mnem 83 #insn_lines.append(line) 84 85 #return arch, first_insn, insn_id_list, insn_lines 86 return arch, first_insn, insn_id_list 87 88 89#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1]) 90arch, first_insn, insn_id_list = extract_matcher(sys.argv[1]) 91arch = arch.upper() 92 93#for line in insn_id_list: 94# print(line) 95 96#{ /* X86_AAA, X86_INS_AAA: aaa */ 97# X86_EFLAGS_UNDEFINED_OF | X86_EFLAGS_UNDEFINED_SF | X86_EFLAGS_UNDEFINED_ZF | X86_EFLAGS_MODIFY_AF | X86_EFLAGS_UNDEFINED_PF | X86_EFLAGS_MODIFY_CF, 98# { 0 } 99#}, 100def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong): 101 insn = "%s_%s" %(arch, insn_id) 102 # first, try to find this entry in old MappingInsn.inc file 103 for i in range(len(mapping)): 104 if mapping[i].startswith('{') and '/*' in mapping[i]: 105 #print(mapping[i]) 106 tmp = mapping[i].split('/*') 107 tmp = tmp[1].strip() 108 tmp = tmp.split(',') 109 #print("insn2 = |%s|" %tmp.strip()) 110 if tmp[0].strip() == insn: 111 if not mnem_can_be_wrong: 112 print(''' 113{\t/* %s, %s_INS_%s: %s */ 114\t%s 115\t%s 116},'''% (insn, arch, mnem, mnem.lower(), mapping[i + 1].strip(), mapping[i + 2].strip())) 117 else: 118 print(''' 119{\t/* %s, %s 120\t%s 121\t%s 122},'''% (insn, ''.join(tmp[1:]).strip(), mapping[i + 1].strip(), mapping[i + 2].strip())) 123 124 return 125 126 print(''' 127{\t/* %s, %s_INS_%s: %s */ 128\t0, 129\t{ 0 } 130},'''% (insn, arch, mnem, mnem.lower())) 131 132 133 134# extract from GenInstrInfo.inc, because the insn id is in order 135enum_count = 0 136meet_insn = False 137 138f = open(sys.argv[2]) 139lines = f.readlines() 140f.close() 141 142 143count = 0 144last_mnem = None 145 146# 1st enum is register enum 147for line in lines: 148 line = line.rstrip() 149 150 if len(line.strip()) == 0: 151 continue 152 153 if line.strip() == 'enum {': 154 enum_count += 1 155 #print(line.strip()) 156 continue 157 158 line = line.strip() 159 if enum_count == 1: 160 if 'INSTRUCTION_LIST_END' in line: 161 break 162 else: 163 insn = None 164 if meet_insn: 165 # enum items 166 insn = line.split('=')[0].strip() 167 if 'CALLSTACK' in insn or 'TAILJUMP' in insn: 168 # pseudo instruction 169 insn = None 170 elif line.startswith(first_insn): 171 insn = line.split('=')[0].strip() 172 meet_insn = True 173 174 if insn: 175 count += 1 176 if insn == 'BSWAP16r_BAD': 177 last_mnem = 'BSWAP' 178 print_entry(arch.upper(), insn, last_mnem, mapping, False) 179 elif insn == 'CMOVNP_Fp32': 180 last_mnem = 'FCMOVNP' 181 print_entry(arch.upper(), insn, last_mnem, mapping, False) 182 elif insn == 'CMOVP_Fp3': 183 last_mnem = 'FCMOVP' 184 print_entry(arch.upper(), insn, last_mnem, mapping, False) 185 elif insn == 'CMPSDrm_Int': 186 last_mnem = 'CMPSD' 187 print_entry(arch.upper(), insn, last_mnem, mapping, False) 188 elif insn == 'MOVSX16rm16': 189 last_mnem = 'MOVSX' 190 print_entry(arch.upper(), insn, last_mnem, mapping, False) 191 elif insn == 'MOVZX16rm16': 192 last_mnem = 'MOVZX' 193 print_entry(arch.upper(), insn, last_mnem, mapping, False) 194 elif insn == 'ST_Fp32m': 195 last_mnem = 'FST' 196 print_entry(arch.upper(), insn, last_mnem, mapping, False) 197 elif insn == 'CMOVNP_Fp64': 198 last_mnem = 'FCMOVNU' 199 print_entry(arch.upper(), insn, last_mnem, mapping, False) 200 elif insn == 'CMPSDrr_Int': 201 last_mnem = 'CMPSD' 202 print_entry(arch.upper(), insn, last_mnem, mapping, False) 203 elif insn == 'CMPSSrm_Int': 204 last_mnem = 'CMPSS' 205 print_entry(arch.upper(), insn, last_mnem, mapping, False) 206 elif insn == 'VCMPSDrm_Int': 207 last_mnem = 'VCMPSD' 208 print_entry(arch.upper(), insn, last_mnem, mapping, False) 209 elif insn == 'VCMPSSrm_Int': 210 last_mnem = 'VCMPSS' 211 print_entry(arch.upper(), insn, last_mnem, mapping, False) 212 elif insn == 'VPCMOVYrrr_REV': 213 last_mnem = 'VPCMOV' 214 print_entry(arch.upper(), insn, last_mnem, mapping, False) 215 elif insn == 'VRNDSCALESDZm': 216 last_mnem = 'VRNDSCALESD' 217 print_entry(arch.upper(), insn, last_mnem, mapping, False) 218 elif insn == 'VRNDSCALESSZm': 219 last_mnem = 'VRNDSCALESS' 220 print_entry(arch.upper(), insn, last_mnem, mapping, False) 221 elif insn == 'VMAXCPDZ128rm': 222 last_mnem = 'VMAXPD' 223 print_entry(arch.upper(), insn, last_mnem, mapping, False) 224 elif insn == 'VMAXCPSZ128rm': 225 last_mnem = 'VMAXPS' 226 print_entry(arch.upper(), insn, last_mnem, mapping, False) 227 elif insn == 'VMAXCSDZrm': 228 last_mnem = 'VMAXSD' 229 print_entry(arch.upper(), insn, last_mnem, mapping, False) 230 elif insn == 'VMAXCSSZrm': 231 last_mnem = 'VMAXSS' 232 print_entry(arch.upper(), insn, last_mnem, mapping, False) 233 elif insn == 'VMINCPDZ128rm': 234 last_mnem = 'VMINPD' 235 print_entry(arch.upper(), insn, last_mnem, mapping, False) 236 elif insn == 'VMINCPSZ128rm': 237 last_mnem = 'VMINPS' 238 print_entry(arch.upper(), insn, last_mnem, mapping, False) 239 elif insn == 'VMINCSDZrm': 240 last_mnem = 'VMINSD' 241 print_entry(arch.upper(), insn, last_mnem, mapping, False) 242 elif insn == 'VMINCSSZrm': 243 last_mnem = 'VMINSS' 244 print_entry(arch.upper(), insn, last_mnem, mapping, False) 245 elif insn == 'VMOV64toPQIZrm': 246 last_mnem = 'VMOVQ' 247 print_entry(arch.upper(), insn, last_mnem, mapping, False) 248 elif insn == 'VPERMIL2PDYrr_REV': 249 last_mnem = 'VPERMILPD' 250 print_entry(arch.upper(), insn, last_mnem, mapping, False) 251 elif insn == 'VPERMIL2PSYrr_REV': 252 last_mnem = 'VPERMILPS' 253 print_entry(arch.upper(), insn, last_mnem, mapping, False) 254 elif insn == 'VCVTSD2SI64Zrm_Int': 255 last_mnem = 'VCVTSD2SI' 256 print_entry(arch.upper(), insn, last_mnem, mapping, False) 257 elif insn == 'VCVTSD2SSrm_Int': 258 last_mnem = 'VCVTSD2SS' 259 print_entry(arch.upper(), insn, last_mnem, mapping, False) 260 elif insn == 'VCVTSS2SI64Zrm_Int': 261 last_mnem = 'VCVTSS2SI' 262 print_entry(arch.upper(), insn, last_mnem, mapping, False) 263 elif insn == 'VCVTTSD2SI64Zrm_Int': 264 last_mnem = 'VCVTTSD2SI' 265 print_entry(arch.upper(), insn, last_mnem, mapping, False) 266 elif insn == 'VCVTTSS2SI64Zrm_Int': 267 last_mnem = 'VCVTTSS2SI' 268 print_entry(arch.upper(), insn, last_mnem, mapping, False) 269 270 elif insn.startswith('VFMSUBADD'): 271 if insn[len('VFMSUBADD')].isdigit(): 272 last_mnem = insn[:len('VFMSUBADD123xy')] 273 else: 274 last_mnem = insn[:len('VFMSUBADDSS')] 275 print_entry(arch.upper(), insn, last_mnem, mapping, False) 276 277 elif insn.startswith('VFMADDSUB'): 278 if insn[len('VFMADDSUB')].isdigit(): 279 last_mnem = insn[:len('VFMADDSUB123xy')] 280 else: 281 last_mnem = insn[:len('VFMADDSUBSS')] 282 print_entry(arch.upper(), insn, last_mnem, mapping, False) 283 284 elif insn.startswith('VFMADD'): 285 if insn[len('VFMADD')].isdigit(): 286 last_mnem = insn[:len('VFMADD123PD')] 287 else: 288 last_mnem = insn[:len('VFMADDPD')] 289 print_entry(arch.upper(), insn, last_mnem, mapping, False) 290 291 elif insn.startswith('VFMSUB'): 292 if insn[len('VFMSUB')].isdigit(): 293 last_mnem = insn[:len('VFMSUB123PD')] 294 else: 295 last_mnem = insn[:len('VFMSUBPD')] 296 print_entry(arch.upper(), insn, last_mnem, mapping, False) 297 298 elif insn.startswith('VFNMADD'): 299 if insn[len('VFNMADD')].isdigit(): 300 last_mnem = insn[:len('VFNMADD123xy')] 301 else: 302 last_mnem = insn[:len('VFNMADDSS')] 303 print_entry(arch.upper(), insn, last_mnem, mapping, False) 304 305 elif insn.startswith('VFNMSUB'): 306 if insn[len('VFNMSUB')].isdigit(): 307 last_mnem = insn[:len('VFNMSUB123xy')] 308 else: 309 last_mnem = insn[:len('VFNMSUBSS')] 310 print_entry(arch.upper(), insn, last_mnem, mapping, False) 311 312 elif insn in insn_id_list: 313 # trust old mapping table 314 last_mnem = insn_id_list[insn].upper() 315 print_entry(arch.upper(), insn, insn_id_list[insn].upper(), mapping, False) 316 else: 317 # the last option when we cannot find mnem: use the last good mnem 318 print_entry(arch.upper(), insn, last_mnem, mapping, True) 319