1# Given a path to llvm-objdump and a directory tree, spider the directory tree 2# dumping every object file encountered with correct options needed to demangle 3# symbols in the object file, and collect statistics about failed / crashed 4# demanglings. Useful for stress testing the demangler against a large corpus 5# of inputs. 6 7from __future__ import print_function 8 9import argparse 10import functools 11import os 12import re 13import sys 14import subprocess 15import traceback 16from multiprocessing import Pool 17import multiprocessing 18 19args = None 20 21def parse_line(line): 22 question = line.find('?') 23 if question == -1: 24 return None, None 25 26 open_paren = line.find('(', question) 27 if open_paren == -1: 28 return None, None 29 close_paren = line.rfind(')', open_paren) 30 if open_paren == -1: 31 return None, None 32 mangled = line[question : open_paren] 33 demangled = line[open_paren+1 : close_paren] 34 return mangled.strip(), demangled.strip() 35 36class Result(object): 37 def __init__(self): 38 self.crashed = [] 39 self.file = None 40 self.nsymbols = 0 41 self.errors = set() 42 self.nfiles = 0 43 44class MapContext(object): 45 def __init__(self): 46 self.rincomplete = None 47 self.rcumulative = Result() 48 self.pending_objs = [] 49 self.npending = 0 50 51def process_file(path, objdump): 52 r = Result() 53 r.file = path 54 55 popen_args = [objdump, '-t', '-demangle', path] 56 p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 57 stdout, stderr = p.communicate() 58 if p.returncode != 0: 59 r.crashed = [r.file] 60 return r 61 62 output = stdout.decode('utf-8') 63 64 for line in output.splitlines(): 65 mangled, demangled = parse_line(line) 66 if mangled is None: 67 continue 68 r.nsymbols += 1 69 if "invalid mangled name" in demangled: 70 r.errors.add(mangled) 71 return r 72 73def add_results(r1, r2): 74 r1.crashed.extend(r2.crashed) 75 r1.errors.update(r2.errors) 76 r1.nsymbols += r2.nsymbols 77 r1.nfiles += r2.nfiles 78 79def print_result_row(directory, result): 80 print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format( 81 result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory)) 82 83def process_one_chunk(pool, chunk_size, objdump, context): 84 objs = [] 85 86 incomplete = False 87 dir_results = {} 88 ordered_dirs = [] 89 while context.npending > 0 and len(objs) < chunk_size: 90 this_dir = context.pending_objs[0][0] 91 ordered_dirs.append(this_dir) 92 re = Result() 93 if context.rincomplete is not None: 94 re = context.rincomplete 95 context.rincomplete = None 96 97 dir_results[this_dir] = re 98 re.file = this_dir 99 100 nneeded = chunk_size - len(objs) 101 objs_this_dir = context.pending_objs[0][1] 102 navail = len(objs_this_dir) 103 ntaken = min(nneeded, navail) 104 objs.extend(objs_this_dir[0:ntaken]) 105 remaining_objs_this_dir = objs_this_dir[ntaken:] 106 context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir) 107 context.npending -= ntaken 108 if ntaken == navail: 109 context.pending_objs.pop(0) 110 else: 111 incomplete = True 112 113 re.nfiles += ntaken 114 115 assert(len(objs) == chunk_size or context.npending == 0) 116 117 copier = functools.partial(process_file, objdump=objdump) 118 mapped_results = list(pool.map(copier, objs)) 119 120 for mr in mapped_results: 121 result_dir = os.path.dirname(mr.file) 122 result_entry = dir_results[result_dir] 123 add_results(result_entry, mr) 124 125 # It's only possible that a single item is incomplete, and it has to be the 126 # last item. 127 if incomplete: 128 context.rincomplete = dir_results[ordered_dirs[-1]] 129 ordered_dirs.pop() 130 131 # Now ordered_dirs contains a list of all directories which *did* complete. 132 for c in ordered_dirs: 133 re = dir_results[c] 134 add_results(context.rcumulative, re) 135 print_result_row(c, re) 136 137def process_pending_files(pool, chunk_size, objdump, context): 138 while context.npending >= chunk_size: 139 process_one_chunk(pool, chunk_size, objdump, context) 140 141def go(): 142 global args 143 144 obj_dir = args.dir 145 extensions = args.extensions.split(',') 146 extensions = [x if x[0] == '.' else '.' + x for x in extensions] 147 148 149 pool_size = 48 150 pool = Pool(processes=pool_size) 151 152 try: 153 nfiles = 0 154 context = MapContext() 155 156 for root, dirs, files in os.walk(obj_dir): 157 root = os.path.normpath(root) 158 pending = [] 159 for f in files: 160 file, ext = os.path.splitext(f) 161 if not ext in extensions: 162 continue 163 164 nfiles += 1 165 full_path = os.path.join(root, f) 166 full_path = os.path.normpath(full_path) 167 pending.append(full_path) 168 169 # If this directory had no object files, just print a default 170 # status line and continue with the next dir 171 if len(pending) == 0: 172 print_result_row(root, Result()) 173 continue 174 175 context.npending += len(pending) 176 context.pending_objs.append((root, pending)) 177 # Drain the tasks, `pool_size` at a time, until we have less than 178 # `pool_size` tasks remaining. 179 process_pending_files(pool, pool_size, args.objdump, context) 180 181 assert(context.npending < pool_size); 182 process_one_chunk(pool, pool_size, args.objdump, context) 183 184 total = context.rcumulative 185 nfailed = len(total.errors) 186 nsuccess = total.nsymbols - nfailed 187 ncrashed = len(total.crashed) 188 189 if (nfailed > 0): 190 print("Failures:") 191 for m in sorted(total.errors): 192 print(" " + m) 193 if (ncrashed > 0): 194 print("Crashes:") 195 for f in sorted(total.crashed): 196 print(" " + f) 197 print("Summary:") 198 spct = float(nsuccess)/float(total.nsymbols) 199 fpct = float(nfailed)/float(total.nsymbols) 200 cpct = float(ncrashed)/float(nfiles) 201 print("Processed {0} object files.".format(nfiles)) 202 print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct)) 203 print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct)) 204 print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct)) 205 206 except: 207 traceback.print_exc() 208 209 pool.close() 210 pool.join() 211 212if __name__ == "__main__": 213 def_obj = 'obj' if sys.platform == 'win32' else 'o' 214 215 parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.') 216 parser.add_argument('dir', type=str, help='the root directory at which to start crawling') 217 parser.add_argument('--objdump', type=str, help='path to llvm-objdump. If not specified ' + 218 'the tool is located as if by `which llvm-objdump`.') 219 parser.add_argument('--extensions', type=str, default=def_obj, 220 help='comma separated list of extensions to demangle (e.g. `o,obj`). ' + 221 'By default this will be `obj` on Windows and `o` otherwise.') 222 223 args = parser.parse_args() 224 225 226 multiprocessing.freeze_support() 227 go() 228 229