1#!/usr/bin/python
2# print list of instructions LLVM inc files, for Capstone disassembler.
3# this will be put into capstone/<arch>.h
4# by Nguyen Anh Quynh, 2019
5
6import sys
7
8if len(sys.argv) == 1:
9    print("Syntax: %s <GenAsmMatcher.inc>" %sys.argv[0])
10    sys.exit(1)
11
12print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */
13/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */
14/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */
15""")
16
17# lib/Target/X86/X86GenAsmMatcher.inc
18# static const MatchEntry MatchTable1[] = {
19#  { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, {  }, },
20
21# extract insn from GenAsmMatcher Table
22# return (arch, mnem, insn_id)
23def extract_insn(line):
24    tmp = line.split(',')
25    insn_raw = tmp[1].strip()
26    insn_mnem = tmp[0].split(' ')[3]
27    # X86 mov.s
28    if '.' in insn_mnem:
29        tmp = insn_mnem.split('.')
30        insn_mnem = tmp[0]
31    tmp = insn_raw.split('::')
32    arch = tmp[0]
33    # AArch64 -> ARM64
34    if arch.upper() == 'AARCH64':
35        arch = 'ARM64'
36    return (arch, insn_mnem, tmp[1])
37
38
39
40# extract all insn lines from GenAsmMatcher
41# return arch, first_insn, insn_id_list
42def extract_matcher(filename):
43    f = open(filename)
44    lines = f.readlines()
45    f.close()
46
47    match_count = 0
48    mnem_list = []
49    insn_id_list = {}
50    arch = None
51    first_insn = None
52
53    pattern = None
54    # first we try to find Table1, or Table0
55    for line in lines:
56        if 'MatchEntry MatchTable0[] = {' in line.strip():
57            pattern = 'MatchEntry MatchTable0[] = {'
58        elif 'MatchEntry MatchTable1[] = {' in line.strip():
59            pattern = 'MatchEntry MatchTable1[] = {'
60            # last pattern, done
61            break
62
63    # 1st enum is register enum
64    for line in lines:
65        line = line.rstrip()
66
67        if len(line.strip()) == 0:
68            continue
69
70        if pattern in line.strip():
71            match_count += 1
72            #print(line.strip())
73            continue
74
75        line = line.strip()
76        if match_count == 1:
77            if line == '};':
78                # done with first enum
79                break
80            else:
81                _arch, mnem, insn_id = extract_insn(line)
82                # skip pseudo instructions
83                if not mnem.startswith('__'):
84                    # PPC
85                    if mnem.endswith('-') or mnem.endswith('+'):
86                        mnem = mnem[:-1]
87
88                    if not first_insn:
89                        arch, first_insn = _arch, insn_id
90
91                    if not insn_id in insn_id_list:
92                        # save this
93                        insn_id_list[insn_id] = mnem
94
95                    if not mnem in mnem_list:
96                        print("%s_INS_%s," %(arch, mnem.upper()))
97                        mnem_list.append(mnem)
98
99    #return arch, first_insn, insn_id_list
100    return arch, first_insn, insn_id_list
101
102# GenAsmMatcher.inc
103#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1])
104arch, first_insn, insn_id_list = extract_matcher(sys.argv[1])
105