1# Given a path to llvm-objdump and a directory tree, spider the directory tree
2# dumping every object file encountered with correct options needed to demangle
3# symbols in the object file, and collect statistics about failed / crashed
4# demanglings.  Useful for stress testing the demangler against a large corpus
5# of inputs.
6
7from __future__ import print_function
8
9import argparse
10import functools
11import os
12import re
13import sys
14import subprocess
15import traceback
16from multiprocessing import Pool
17import multiprocessing
18
19args = None
20
21def parse_line(line):
22    question = line.find('?')
23    if question == -1:
24        return None, None
25
26    open_paren = line.find('(', question)
27    if open_paren == -1:
28        return None, None
29    close_paren = line.rfind(')', open_paren)
30    if open_paren == -1:
31        return None, None
32    mangled = line[question : open_paren]
33    demangled = line[open_paren+1 : close_paren]
34    return mangled.strip(), demangled.strip()
35
36class Result(object):
37    def __init__(self):
38        self.crashed = []
39        self.file = None
40        self.nsymbols = 0
41        self.errors = set()
42        self.nfiles = 0
43
44class MapContext(object):
45    def __init__(self):
46        self.rincomplete = None
47        self.rcumulative = Result()
48        self.pending_objs = []
49        self.npending = 0
50
51def process_file(path, objdump):
52    r = Result()
53    r.file = path
54
55    popen_args = [objdump, '-t', '-demangle', path]
56    p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
57    stdout, stderr = p.communicate()
58    if p.returncode != 0:
59        r.crashed = [r.file]
60        return r
61
62    output = stdout.decode('utf-8')
63
64    for line in output.splitlines():
65        mangled, demangled = parse_line(line)
66        if mangled is None:
67            continue
68        r.nsymbols += 1
69        if "invalid mangled name" in demangled:
70            r.errors.add(mangled)
71    return r
72
73def add_results(r1, r2):
74    r1.crashed.extend(r2.crashed)
75    r1.errors.update(r2.errors)
76    r1.nsymbols += r2.nsymbols
77    r1.nfiles += r2.nfiles
78
79def print_result_row(directory, result):
80    print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
81        result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory))
82
83def process_one_chunk(pool, chunk_size, objdump, context):
84    objs = []
85
86    incomplete = False
87    dir_results = {}
88    ordered_dirs = []
89    while context.npending > 0 and len(objs) < chunk_size:
90        this_dir = context.pending_objs[0][0]
91        ordered_dirs.append(this_dir)
92        re = Result()
93        if context.rincomplete is not None:
94            re = context.rincomplete
95            context.rincomplete = None
96
97        dir_results[this_dir] = re
98        re.file = this_dir
99
100        nneeded = chunk_size - len(objs)
101        objs_this_dir = context.pending_objs[0][1]
102        navail = len(objs_this_dir)
103        ntaken = min(nneeded, navail)
104        objs.extend(objs_this_dir[0:ntaken])
105        remaining_objs_this_dir = objs_this_dir[ntaken:]
106        context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
107        context.npending -= ntaken
108        if ntaken == navail:
109            context.pending_objs.pop(0)
110        else:
111            incomplete = True
112
113        re.nfiles += ntaken
114
115    assert(len(objs) == chunk_size or context.npending == 0)
116
117    copier = functools.partial(process_file, objdump=objdump)
118    mapped_results = list(pool.map(copier, objs))
119
120    for mr in mapped_results:
121        result_dir = os.path.dirname(mr.file)
122        result_entry = dir_results[result_dir]
123        add_results(result_entry, mr)
124
125    # It's only possible that a single item is incomplete, and it has to be the
126    # last item.
127    if incomplete:
128        context.rincomplete = dir_results[ordered_dirs[-1]]
129        ordered_dirs.pop()
130
131    # Now ordered_dirs contains a list of all directories which *did* complete.
132    for c in ordered_dirs:
133        re = dir_results[c]
134        add_results(context.rcumulative, re)
135        print_result_row(c, re)
136
137def process_pending_files(pool, chunk_size, objdump, context):
138    while context.npending >= chunk_size:
139        process_one_chunk(pool, chunk_size, objdump, context)
140
141def go():
142    global args
143
144    obj_dir = args.dir
145    extensions = args.extensions.split(',')
146    extensions = [x if x[0] == '.' else '.' + x for x in extensions]
147
148
149    pool_size = 48
150    pool = Pool(processes=pool_size)
151
152    try:
153        nfiles = 0
154        context = MapContext()
155
156        for root, dirs, files in os.walk(obj_dir):
157            root = os.path.normpath(root)
158            pending = []
159            for f in files:
160                file, ext = os.path.splitext(f)
161                if not ext in extensions:
162                    continue
163
164                nfiles += 1
165                full_path = os.path.join(root, f)
166                full_path = os.path.normpath(full_path)
167                pending.append(full_path)
168
169            # If this directory had no object files, just print a default
170            # status line and continue with the next dir
171            if len(pending) == 0:
172                print_result_row(root, Result())
173                continue
174
175            context.npending += len(pending)
176            context.pending_objs.append((root, pending))
177            # Drain the tasks, `pool_size` at a time, until we have less than
178            # `pool_size` tasks remaining.
179            process_pending_files(pool, pool_size, args.objdump, context)
180
181        assert(context.npending < pool_size);
182        process_one_chunk(pool, pool_size, args.objdump, context)
183
184        total = context.rcumulative
185        nfailed = len(total.errors)
186        nsuccess = total.nsymbols - nfailed
187        ncrashed = len(total.crashed)
188
189        if (nfailed > 0):
190            print("Failures:")
191            for m in sorted(total.errors):
192                print("  " + m)
193        if (ncrashed > 0):
194            print("Crashes:")
195            for f in sorted(total.crashed):
196                print("  " + f)
197        print("Summary:")
198        spct = float(nsuccess)/float(total.nsymbols)
199        fpct = float(nfailed)/float(total.nsymbols)
200        cpct = float(ncrashed)/float(nfiles)
201        print("Processed {0} object files.".format(nfiles))
202        print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct))
203        print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
204        print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
205
206    except:
207        traceback.print_exc()
208
209    pool.close()
210    pool.join()
211
212if __name__ == "__main__":
213    def_obj = 'obj' if sys.platform == 'win32' else 'o'
214
215    parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.')
216    parser.add_argument('dir', type=str, help='the root directory at which to start crawling')
217    parser.add_argument('--objdump', type=str, help='path to llvm-objdump.  If not specified ' +
218                        'the tool is located as if by `which llvm-objdump`.')
219    parser.add_argument('--extensions', type=str, default=def_obj,
220                        help='comma separated list of extensions to demangle (e.g. `o,obj`).  ' +
221                        'By default this will be `obj` on Windows and `o` otherwise.')
222
223    args = parser.parse_args()
224
225
226    multiprocessing.freeze_support()
227    go()
228
229