1#!/usr/bin/env python 2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3# 4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5# See https://llvm.org/LICENSE.txt for license information. 6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7# 8#===------------------------------------------------------------------------===# 9""" 10Example of use: 11 asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/SymbolFiles" < asan.log 12 13PLUGINS 14 15This script provides a way for external plug-ins to hook into the behaviour of 16various parts of this script (see `--plugins`). This is useful for situations 17where it is necessary to handle site-specific quirks (e.g. binaries with debug 18symbols only accessible via a remote service) without having to modify the 19script itself. 20 21""" 22import argparse 23import bisect 24import errno 25import getopt 26import logging 27import os 28import re 29import subprocess 30import sys 31from distutils.spawn import find_executable 32 33symbolizers = {} 34demangle = False 35binutils_prefix = None 36fix_filename_patterns = None 37logfile = sys.stdin 38allow_system_symbolizer = True 39force_system_symbolizer = False 40 41# FIXME: merge the code that calls fix_filename(). 42def fix_filename(file_name): 43 if fix_filename_patterns: 44 for path_to_cut in fix_filename_patterns: 45 file_name = re.sub('.*' + path_to_cut, '', file_name) 46 file_name = re.sub('.*asan_[a-z_]*.(cc|cpp):[0-9]*', '_asan_rtl_', file_name) 47 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) 48 return file_name 49 50def is_valid_arch(s): 51 return s in ["i386", "x86_64", "x86_64h", "arm", "armv6", "armv7", "armv7s", 52 "armv7k", "arm64", "powerpc64", "powerpc64le", "s390x", "s390", 53 "riscv64"] 54 55def guess_arch(addr): 56 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. 57 if len(addr) > 10: 58 return 'x86_64' 59 else: 60 return 'i386' 61 62class Symbolizer(object): 63 def __init__(self): 64 pass 65 66 def symbolize(self, addr, binary, offset): 67 """Symbolize the given address (pair of binary and offset). 68 69 Overriden in subclasses. 70 Args: 71 addr: virtual address of an instruction. 72 binary: path to executable/shared object containing this instruction. 73 offset: instruction offset in the @binary. 74 Returns: 75 list of strings (one string for each inlined frame) describing 76 the code locations for this instruction (that is, function name, file 77 name, line and column numbers). 78 """ 79 return None 80 81 82class LLVMSymbolizer(Symbolizer): 83 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]): 84 super(LLVMSymbolizer, self).__init__() 85 self.symbolizer_path = symbolizer_path 86 self.default_arch = default_arch 87 self.system = system 88 self.dsym_hints = dsym_hints 89 self.pipe = self.open_llvm_symbolizer() 90 91 def open_llvm_symbolizer(self): 92 cmd = [self.symbolizer_path, 93 ('--demangle' if demangle else '--no-demangle'), 94 '--functions=linkage', 95 '--inlines', 96 '--default-arch=%s' % self.default_arch] 97 if self.system == 'Darwin': 98 for hint in self.dsym_hints: 99 cmd.append('--dsym-hint=%s' % hint) 100 logging.debug(' '.join(cmd)) 101 try: 102 result = subprocess.Popen(cmd, stdin=subprocess.PIPE, 103 stdout=subprocess.PIPE, 104 bufsize=0, 105 universal_newlines=True) 106 except OSError: 107 result = None 108 return result 109 110 def symbolize(self, addr, binary, offset): 111 """Overrides Symbolizer.symbolize.""" 112 if not self.pipe: 113 return None 114 result = [] 115 try: 116 symbolizer_input = '"%s" %s' % (binary, offset) 117 logging.debug(symbolizer_input) 118 self.pipe.stdin.write("%s\n" % symbolizer_input) 119 while True: 120 function_name = self.pipe.stdout.readline().rstrip() 121 if not function_name: 122 break 123 file_name = self.pipe.stdout.readline().rstrip() 124 file_name = fix_filename(file_name) 125 if (not function_name.startswith('??') or 126 not file_name.startswith('??')): 127 # Append only non-trivial frames. 128 result.append('%s in %s %s' % (addr, function_name, 129 file_name)) 130 except Exception: 131 result = [] 132 if not result: 133 result = None 134 return result 135 136 137def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]): 138 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') 139 if not symbolizer_path: 140 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH') 141 if not symbolizer_path: 142 # Assume llvm-symbolizer is in PATH. 143 symbolizer_path = 'llvm-symbolizer' 144 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints) 145 146 147class Addr2LineSymbolizer(Symbolizer): 148 def __init__(self, binary): 149 super(Addr2LineSymbolizer, self).__init__() 150 self.binary = binary 151 self.pipe = self.open_addr2line() 152 self.output_terminator = -1 153 154 def open_addr2line(self): 155 addr2line_tool = 'addr2line' 156 if binutils_prefix: 157 addr2line_tool = binutils_prefix + addr2line_tool 158 logging.debug('addr2line binary is %s' % find_executable(addr2line_tool)) 159 cmd = [addr2line_tool, '-fi'] 160 if demangle: 161 cmd += ['--demangle'] 162 cmd += ['-e', self.binary] 163 logging.debug(' '.join(cmd)) 164 return subprocess.Popen(cmd, 165 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 166 bufsize=0, 167 universal_newlines=True) 168 169 def symbolize(self, addr, binary, offset): 170 """Overrides Symbolizer.symbolize.""" 171 if self.binary != binary: 172 return None 173 lines = [] 174 try: 175 self.pipe.stdin.write("%s\n" % offset) 176 self.pipe.stdin.write("%s\n" % self.output_terminator) 177 is_first_frame = True 178 while True: 179 function_name = self.pipe.stdout.readline().rstrip() 180 logging.debug("read function_name='%s' from addr2line" % function_name) 181 # If llvm-symbolizer is installed as addr2line, older versions of 182 # llvm-symbolizer will print -1 when presented with -1 and not print 183 # a second line. In that case we will block for ever trying to read the 184 # file name. This also happens for non-existent files, in which case GNU 185 # addr2line exits immediate, but llvm-symbolizer does not (see 186 # https://llvm.org/PR42754). 187 if function_name == '-1': 188 logging.debug("got function '-1' -> no more input") 189 break 190 file_name = self.pipe.stdout.readline().rstrip() 191 logging.debug("read file_name='%s' from addr2line" % file_name) 192 if is_first_frame: 193 is_first_frame = False 194 elif function_name == '??': 195 assert file_name == '??:0', file_name 196 logging.debug("got function '??' -> no more input") 197 break 198 elif not function_name: 199 assert not file_name, file_name 200 logging.debug("got empty function name -> no more input") 201 break 202 if not function_name and not file_name: 203 logging.debug("got empty function and file name -> unknown function") 204 function_name = '??' 205 file_name = '??:0' 206 lines.append((function_name, file_name)) 207 except IOError as e: 208 # EPIPE happens if addr2line exits early (which some implementations do 209 # if an invalid file is passed). 210 if e.errno == errno.EPIPE: 211 logging.debug("addr2line exited early (broken pipe), returncode=%d" % self.pipe.poll()) 212 else: 213 logging.debug("unexpected I/O exception communicating with addr2line", exc_info=e) 214 lines.append(('??', '??:0')) 215 except Exception as e: 216 logging.debug("got unknown exception communicating with addr2line", exc_info=e) 217 lines.append(('??', '??:0')) 218 return ['%s in %s %s' % (addr, function, fix_filename(file)) for (function, file) in lines] 219 220class UnbufferedLineConverter(object): 221 """ 222 Wrap a child process that responds to each line of input with one line of 223 output. Uses pty to trick the child into providing unbuffered output. 224 """ 225 def __init__(self, args, close_stderr=False): 226 # Local imports so that the script can start on Windows. 227 import pty 228 import termios 229 pid, fd = pty.fork() 230 if pid == 0: 231 # We're the child. Transfer control to command. 232 if close_stderr: 233 dev_null = os.open('/dev/null', 0) 234 os.dup2(dev_null, 2) 235 os.execvp(args[0], args) 236 else: 237 # Disable echoing. 238 attr = termios.tcgetattr(fd) 239 attr[3] = attr[3] & ~termios.ECHO 240 termios.tcsetattr(fd, termios.TCSANOW, attr) 241 # Set up a file()-like interface to the child process 242 self.r = os.fdopen(fd, "r", 1) 243 self.w = os.fdopen(os.dup(fd), "w", 1) 244 245 def convert(self, line): 246 self.w.write(line + "\n") 247 return self.readline() 248 249 def readline(self): 250 return self.r.readline().rstrip() 251 252 253class DarwinSymbolizer(Symbolizer): 254 def __init__(self, addr, binary, arch): 255 super(DarwinSymbolizer, self).__init__() 256 self.binary = binary 257 self.arch = arch 258 self.open_atos() 259 260 def open_atos(self): 261 logging.debug('atos -o %s -arch %s', self.binary, self.arch) 262 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] 263 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) 264 265 def symbolize(self, addr, binary, offset): 266 """Overrides Symbolizer.symbolize.""" 267 if self.binary != binary: 268 return None 269 if not os.path.exists(binary): 270 # If the binary doesn't exist atos will exit which will lead to IOError 271 # exceptions being raised later on so just don't try to symbolize. 272 return ['{} ({}:{}+{})'.format(addr, binary, self.arch, offset)] 273 atos_line = self.atos.convert('0x%x' % int(offset, 16)) 274 while "got symbolicator for" in atos_line: 275 atos_line = self.atos.readline() 276 # A well-formed atos response looks like this: 277 # foo(type1, type2) (in object.name) (filename.cc:80) 278 # NOTE: 279 # * For C functions atos omits parentheses and argument types. 280 # * For C++ functions the function name (i.e., `foo` above) may contain 281 # templates which may contain parentheses. 282 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 283 logging.debug('atos_line: %s', atos_line) 284 if match: 285 function_name = match.group(1) 286 file_name = fix_filename(match.group(3)) 287 return ['%s in %s %s' % (addr, function_name, file_name)] 288 else: 289 return ['%s in %s' % (addr, atos_line)] 290 291 292# Chain several symbolizers so that if one symbolizer fails, we fall back 293# to the next symbolizer in chain. 294class ChainSymbolizer(Symbolizer): 295 def __init__(self, symbolizer_list): 296 super(ChainSymbolizer, self).__init__() 297 self.symbolizer_list = symbolizer_list 298 299 def symbolize(self, addr, binary, offset): 300 """Overrides Symbolizer.symbolize.""" 301 for symbolizer in self.symbolizer_list: 302 if symbolizer: 303 result = symbolizer.symbolize(addr, binary, offset) 304 if result: 305 return result 306 return None 307 308 def append_symbolizer(self, symbolizer): 309 self.symbolizer_list.append(symbolizer) 310 311 312def BreakpadSymbolizerFactory(binary): 313 suffix = os.getenv('BREAKPAD_SUFFIX') 314 if suffix: 315 filename = binary + suffix 316 if os.access(filename, os.F_OK): 317 return BreakpadSymbolizer(filename) 318 return None 319 320 321def SystemSymbolizerFactory(system, addr, binary, arch): 322 if system == 'Darwin': 323 return DarwinSymbolizer(addr, binary, arch) 324 elif system in ['Linux', 'FreeBSD', 'NetBSD', 'SunOS']: 325 return Addr2LineSymbolizer(binary) 326 327 328class BreakpadSymbolizer(Symbolizer): 329 def __init__(self, filename): 330 super(BreakpadSymbolizer, self).__init__() 331 self.filename = filename 332 lines = file(filename).readlines() 333 self.files = [] 334 self.symbols = {} 335 self.address_list = [] 336 self.addresses = {} 337 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 338 fragments = lines[0].rstrip().split() 339 self.arch = fragments[2] 340 self.debug_id = fragments[3] 341 self.binary = ' '.join(fragments[4:]) 342 self.parse_lines(lines[1:]) 343 344 def parse_lines(self, lines): 345 cur_function_addr = '' 346 for line in lines: 347 fragments = line.split() 348 if fragments[0] == 'FILE': 349 assert int(fragments[1]) == len(self.files) 350 self.files.append(' '.join(fragments[2:])) 351 elif fragments[0] == 'PUBLIC': 352 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 353 elif fragments[0] in ['CFI', 'STACK']: 354 pass 355 elif fragments[0] == 'FUNC': 356 cur_function_addr = int(fragments[1], 16) 357 if not cur_function_addr in self.symbols.keys(): 358 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) 359 else: 360 # Line starting with an address. 361 addr = int(fragments[0], 16) 362 self.address_list.append(addr) 363 # Tuple of symbol address, size, line, file number. 364 self.addresses[addr] = (cur_function_addr, 365 int(fragments[1], 16), 366 int(fragments[2]), 367 int(fragments[3])) 368 self.address_list.sort() 369 370 def get_sym_file_line(self, addr): 371 key = None 372 if addr in self.addresses.keys(): 373 key = addr 374 else: 375 index = bisect.bisect_left(self.address_list, addr) 376 if index == 0: 377 return None 378 else: 379 key = self.address_list[index - 1] 380 sym_id, size, line_no, file_no = self.addresses[key] 381 symbol = self.symbols[sym_id] 382 filename = self.files[file_no] 383 if addr < key + size: 384 return symbol, filename, line_no 385 else: 386 return None 387 388 def symbolize(self, addr, binary, offset): 389 if self.binary != binary: 390 return None 391 res = self.get_sym_file_line(int(offset, 16)) 392 if res: 393 function_name, file_name, line_no = res 394 result = ['%s in %s %s:%d' % ( 395 addr, function_name, file_name, line_no)] 396 print(result) 397 return result 398 else: 399 return None 400 401 402class SymbolizationLoop(object): 403 def __init__(self, plugin_proxy=None, dsym_hint_producer=None): 404 self.plugin_proxy = plugin_proxy 405 if sys.platform == 'win32': 406 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works 407 # even in sandboxed processes. Nothing needs to be done here. 408 self.process_line = self.process_line_echo 409 else: 410 # Used by clients who may want to supply a different binary name. 411 # E.g. in Chrome several binaries may share a single .dSYM. 412 self.dsym_hint_producer = dsym_hint_producer 413 self.system = os.uname()[0] 414 if self.system not in ['Linux', 'Darwin', 'FreeBSD', 'NetBSD','SunOS']: 415 raise Exception('Unknown system') 416 self.llvm_symbolizers = {} 417 self.last_llvm_symbolizer = None 418 self.dsym_hints = set([]) 419 self.frame_no = 0 420 self.process_line = self.process_line_posix 421 self.using_module_map = plugin_proxy.has_plugin(ModuleMapPlugIn.get_name()) 422 423 def symbolize_address(self, addr, binary, offset, arch): 424 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use 425 # a single symbolizer binary. 426 # On Darwin, if the dsym hint producer is present: 427 # 1. check whether we've seen this binary already; if so, 428 # use |llvm_symbolizers[binary]|, which has already loaded the debug 429 # info for this binary (might not be the case for 430 # |last_llvm_symbolizer|); 431 # 2. otherwise check if we've seen all the hints for this binary already; 432 # if so, reuse |last_llvm_symbolizer| which has the full set of hints; 433 # 3. otherwise create a new symbolizer and pass all currently known 434 # .dSYM hints to it. 435 result = None 436 if not force_system_symbolizer: 437 if not binary in self.llvm_symbolizers: 438 use_new_symbolizer = True 439 if self.system == 'Darwin' and self.dsym_hint_producer: 440 dsym_hints_for_binary = set(self.dsym_hint_producer(binary)) 441 use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints) 442 self.dsym_hints |= dsym_hints_for_binary 443 if self.last_llvm_symbolizer and not use_new_symbolizer: 444 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer 445 else: 446 self.last_llvm_symbolizer = LLVMSymbolizerFactory( 447 self.system, arch, self.dsym_hints) 448 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer 449 # Use the chain of symbolizers: 450 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos 451 # (fall back to next symbolizer if the previous one fails). 452 if not binary in symbolizers: 453 symbolizers[binary] = ChainSymbolizer( 454 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]]) 455 result = symbolizers[binary].symbolize(addr, binary, offset) 456 else: 457 symbolizers[binary] = ChainSymbolizer([]) 458 if result is None: 459 if not allow_system_symbolizer: 460 raise Exception('Failed to launch or use llvm-symbolizer.') 461 # Initialize system symbolizer only if other symbolizers failed. 462 symbolizers[binary].append_symbolizer( 463 SystemSymbolizerFactory(self.system, addr, binary, arch)) 464 result = symbolizers[binary].symbolize(addr, binary, offset) 465 # The system symbolizer must produce some result. 466 assert result 467 return result 468 469 def get_symbolized_lines(self, symbolized_lines, inc_frame_counter=True): 470 if not symbolized_lines: 471 if inc_frame_counter: 472 self.frame_no += 1 473 return [self.current_line] 474 else: 475 assert inc_frame_counter 476 result = [] 477 for symbolized_frame in symbolized_lines: 478 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip())) 479 self.frame_no += 1 480 return result 481 482 def process_logfile(self): 483 self.frame_no = 0 484 for line in logfile: 485 processed = self.process_line(line) 486 print('\n'.join(processed)) 487 488 def process_line_echo(self, line): 489 return [line.rstrip()] 490 491 def process_line_posix(self, line): 492 self.current_line = line.rstrip() 493 # Unsymbolicated: 494 # #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 495 # Partially symbolicated: 496 # #0 0x7f6e35cf2e45 in foo (foo.so+0x11fe45) 497 # NOTE: We have to very liberal with symbol 498 # names in the regex because it could be an 499 # Objective-C or C++ demangled name. 500 stack_trace_line_format = ( 501 '^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)') 502 match = re.match(stack_trace_line_format, line) 503 if not match: 504 logging.debug('Line "{}" does not match regex'.format(line)) 505 # Not a frame line so don't increment the frame counter. 506 return self.get_symbolized_lines(None, inc_frame_counter=False) 507 logging.debug(line) 508 _, frameno_str, addr, binary, offset = match.groups() 509 510 if not self.using_module_map and not os.path.isabs(binary): 511 # Do not try to symbolicate if the binary is just the module file name 512 # and a module map is unavailable. 513 # FIXME(dliew): This is currently necessary for reports on Darwin that are 514 # partially symbolicated by `atos`. 515 return self.get_symbolized_lines(None) 516 arch = "" 517 # Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h" 518 colon_pos = binary.rfind(":") 519 if colon_pos != -1: 520 maybe_arch = binary[colon_pos+1:] 521 if is_valid_arch(maybe_arch): 522 arch = maybe_arch 523 binary = binary[0:colon_pos] 524 if arch == "": 525 arch = guess_arch(addr) 526 if frameno_str == '0': 527 # Assume that frame #0 is the first frame of new stack trace. 528 self.frame_no = 0 529 original_binary = binary 530 binary = self.plugin_proxy.filter_binary_path(binary) 531 if binary is None: 532 # The binary filter has told us this binary can't be symbolized. 533 logging.debug('Skipping symbolication of binary "%s"', original_binary) 534 return self.get_symbolized_lines(None) 535 symbolized_line = self.symbolize_address(addr, binary, offset, arch) 536 if not symbolized_line: 537 if original_binary != binary: 538 symbolized_line = self.symbolize_address(addr, original_binary, offset, arch) 539 return self.get_symbolized_lines(symbolized_line) 540 541class AsanSymbolizerPlugInProxy(object): 542 """ 543 Serves several purposes: 544 - Manages the lifetime of plugins (must be used a `with` statement). 545 - Provides interface for calling into plugins from within this script. 546 """ 547 def __init__(self): 548 self._plugins = [ ] 549 self._plugin_names = set() 550 551 def _load_plugin_from_file_impl_py_gt_2(self, file_path, globals_space): 552 with open(file_path, 'r') as f: 553 exec(f.read(), globals_space, None) 554 555 def load_plugin_from_file(self, file_path): 556 logging.info('Loading plugins from "{}"'.format(file_path)) 557 globals_space = dict(globals()) 558 # Provide function to register plugins 559 def register_plugin(plugin): 560 logging.info('Registering plugin %s', plugin.get_name()) 561 self.add_plugin(plugin) 562 globals_space['register_plugin'] = register_plugin 563 if sys.version_info.major < 3: 564 execfile(file_path, globals_space, None) 565 else: 566 # Indirection here is to avoid a bug in older Python 2 versions: 567 # `SyntaxError: unqualified exec is not allowed in function ...` 568 self._load_plugin_from_file_impl_py_gt_2(file_path, globals_space) 569 570 def add_plugin(self, plugin): 571 assert isinstance(plugin, AsanSymbolizerPlugIn) 572 self._plugins.append(plugin) 573 self._plugin_names.add(plugin.get_name()) 574 plugin._receive_proxy(self) 575 576 def remove_plugin(self, plugin): 577 assert isinstance(plugin, AsanSymbolizerPlugIn) 578 self._plugins.remove(plugin) 579 self._plugin_names.remove(plugin.get_name()) 580 logging.debug('Removing plugin %s', plugin.get_name()) 581 plugin.destroy() 582 583 def has_plugin(self, name): 584 """ 585 Returns true iff the plugin name is currently 586 being managed by AsanSymbolizerPlugInProxy. 587 """ 588 return name in self._plugin_names 589 590 def register_cmdline_args(self, parser): 591 plugins = list(self._plugins) 592 for plugin in plugins: 593 plugin.register_cmdline_args(parser) 594 595 def process_cmdline_args(self, pargs): 596 # Use copy so we can remove items as we iterate. 597 plugins = list(self._plugins) 598 for plugin in plugins: 599 keep = plugin.process_cmdline_args(pargs) 600 assert isinstance(keep, bool) 601 if not keep: 602 self.remove_plugin(plugin) 603 604 def __enter__(self): 605 return self 606 607 def __exit__(self, exc_type, exc_val, exc_tb): 608 for plugin in self._plugins: 609 plugin.destroy() 610 # Don't suppress raised exceptions 611 return False 612 613 def _filter_single_value(self, function_name, input_value): 614 """ 615 Helper for filter style plugin functions. 616 """ 617 new_value = input_value 618 for plugin in self._plugins: 619 result = getattr(plugin, function_name)(new_value) 620 if result is None: 621 return None 622 new_value = result 623 return new_value 624 625 def filter_binary_path(self, binary_path): 626 """ 627 Consult available plugins to filter the path to a binary 628 to make it suitable for symbolication. 629 630 Returns `None` if symbolication should not be attempted for this 631 binary. 632 """ 633 return self._filter_single_value('filter_binary_path', binary_path) 634 635 def filter_module_desc(self, module_desc): 636 """ 637 Consult available plugins to determine the module 638 description suitable for symbolication. 639 640 Returns `None` if symbolication should not be attempted for this module. 641 """ 642 assert isinstance(module_desc, ModuleDesc) 643 return self._filter_single_value('filter_module_desc', module_desc) 644 645class AsanSymbolizerPlugIn(object): 646 """ 647 This is the interface the `asan_symbolize.py` code uses to talk 648 to plugins. 649 """ 650 @classmethod 651 def get_name(cls): 652 """ 653 Returns the name of the plugin. 654 """ 655 return cls.__name__ 656 657 def _receive_proxy(self, proxy): 658 assert isinstance(proxy, AsanSymbolizerPlugInProxy) 659 self.proxy = proxy 660 661 def register_cmdline_args(self, parser): 662 """ 663 Hook for registering command line arguments to be 664 consumed in `process_cmdline_args()`. 665 666 `parser` - Instance of `argparse.ArgumentParser`. 667 """ 668 pass 669 670 def process_cmdline_args(self, pargs): 671 """ 672 Hook for handling parsed arguments. Implementations 673 should not modify `pargs`. 674 675 `pargs` - Instance of `argparse.Namespace` containing 676 parsed command line arguments. 677 678 Return `True` if plug-in should be used, otherwise 679 return `False`. 680 """ 681 return True 682 683 def destroy(self): 684 """ 685 Hook called when a plugin is about to be destroyed. 686 Implementations should free any allocated resources here. 687 """ 688 pass 689 690 # Symbolization hooks 691 def filter_binary_path(self, binary_path): 692 """ 693 Given a binary path return a binary path suitable for symbolication. 694 695 Implementations should return `None` if symbolication of this binary 696 should be skipped. 697 """ 698 return binary_path 699 700 def filter_module_desc(self, module_desc): 701 """ 702 Given a ModuleDesc object (`module_desc`) return 703 a ModuleDesc suitable for symbolication. 704 705 Implementations should return `None` if symbolication of this binary 706 should be skipped. 707 """ 708 return module_desc 709 710class ModuleDesc(object): 711 def __init__(self, name, arch, start_addr, end_addr, module_path, uuid): 712 self.name = name 713 self.arch = arch 714 self.start_addr = start_addr 715 self.end_addr = end_addr 716 # Module path from an ASan report. 717 self.module_path = module_path 718 # Module for performing symbolization, by default same as above. 719 self.module_path_for_symbolization = module_path 720 self.uuid = uuid 721 assert self.is_valid() 722 723 def __str__(self): 724 assert self.is_valid() 725 return "{name} {arch} {start_addr:#016x}-{end_addr:#016x} {module_path} {uuid}".format( 726 name=self.name, 727 arch=self.arch, 728 start_addr=self.start_addr, 729 end_addr=self.end_addr, 730 module_path=self.module_path if self.module_path == self.module_path_for_symbolization else '{} ({})'.format(self.module_path_for_symbolization, self.module_path), 731 uuid=self.uuid 732 ) 733 734 def is_valid(self): 735 if not isinstance(self.name, str): 736 return False 737 if not isinstance(self.arch, str): 738 return False 739 if not isinstance(self.start_addr, int): 740 return False 741 if self.start_addr < 0: 742 return False 743 if not isinstance(self.end_addr, int): 744 return False 745 if self.end_addr <= self.start_addr: 746 return False 747 if not isinstance(self.module_path, str): 748 return False 749 if not os.path.isabs(self.module_path): 750 return False 751 if not isinstance(self.module_path_for_symbolization, str): 752 return False 753 if not os.path.isabs(self.module_path_for_symbolization): 754 return False 755 if not isinstance(self.uuid, str): 756 return False 757 return True 758 759class GetUUIDFromBinaryException(Exception): 760 def __init__(self, msg): 761 super(GetUUIDFromBinaryException, self).__init__(msg) 762 763_get_uuid_from_binary_cache = dict() 764 765def get_uuid_from_binary(path_to_binary, arch=None): 766 cache_key = (path_to_binary, arch) 767 cached_value = _get_uuid_from_binary_cache.get(cache_key) 768 if cached_value: 769 return cached_value 770 if not os.path.exists(path_to_binary): 771 raise GetUUIDFromBinaryException('Binary "{}" does not exist'.format(path_to_binary)) 772 cmd = [ '/usr/bin/otool', '-l'] 773 if arch: 774 cmd.extend(['-arch', arch]) 775 cmd.append(path_to_binary) 776 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) 777 # Look for this output: 778 # cmd LC_UUID 779 # cmdsize 24 780 # uuid 4CA778FE-5BF9-3C45-AE59-7DF01B2BE83F 781 if isinstance(output, str): 782 output_str = output 783 else: 784 assert isinstance(output, bytes) 785 output_str = output.decode() 786 assert isinstance(output_str, str) 787 lines = output_str.split('\n') 788 uuid = None 789 for index, line in enumerate(lines): 790 stripped_line = line.strip() 791 if not stripped_line.startswith('cmd LC_UUID'): 792 continue 793 uuid_line = lines[index+2].strip() 794 if not uuid_line.startswith('uuid'): 795 raise GetUUIDFromBinaryException('Malformed output: "{}"'.format(uuid_line)) 796 split_uuid_line = uuid_line.split() 797 uuid = split_uuid_line[1] 798 break 799 if uuid is None: 800 logging.error('Failed to retrieve UUID from binary {}'.format(path_to_binary)) 801 logging.error('otool output was:\n{}'.format(output_str)) 802 raise GetUUIDFromBinaryException('Failed to retrieve UUID from binary "{}"'.format(path_to_binary)) 803 else: 804 # Update cache 805 _get_uuid_from_binary_cache[cache_key] = uuid 806 return uuid 807 808class ModuleMap(object): 809 def __init__(self): 810 self._module_name_to_description_map = dict() 811 812 def add_module(self, desc): 813 assert isinstance(desc, ModuleDesc) 814 assert desc.name not in self._module_name_to_description_map 815 self._module_name_to_description_map[desc.name] = desc 816 817 def find_module_by_name(self, name): 818 return self._module_name_to_description_map.get(name, None) 819 820 def __str__(self): 821 s = '{} modules:\n'.format(self.num_modules) 822 for module_desc in sorted(self._module_name_to_description_map.values(), key=lambda v: v.start_addr): 823 s += str(module_desc) + '\n' 824 return s 825 826 @property 827 def num_modules(self): 828 return len(self._module_name_to_description_map) 829 830 @property 831 def modules(self): 832 return set(self._module_name_to_description_map.values()) 833 834 def get_module_path_for_symbolication(self, module_name, proxy, validate_uuid): 835 module_desc = self.find_module_by_name(module_name) 836 if module_desc is None: 837 return None 838 # Allow a plug-in to change the module description to make it 839 # suitable for symbolication or avoid symbolication altogether. 840 module_desc = proxy.filter_module_desc(module_desc) 841 if module_desc is None: 842 return None 843 if validate_uuid: 844 logging.debug('Validating UUID of {}'.format(module_desc.module_path_for_symbolization)) 845 try: 846 uuid = get_uuid_from_binary(module_desc.module_path_for_symbolization, arch = module_desc.arch) 847 if uuid != module_desc.uuid: 848 logging.warning("Detected UUID mismatch {} != {}".format(uuid, module_desc.uuid)) 849 # UUIDs don't match. Tell client to not symbolize this. 850 return None 851 except GetUUIDFromBinaryException as e: 852 logging.error('Failed to get binary from UUID: %s', str(e)) 853 return None 854 else: 855 logging.warning('Skipping validation of UUID of {}'.format(module_desc.module_path_for_symbolization)) 856 return module_desc.module_path_for_symbolization 857 858 @staticmethod 859 def parse_from_file(module_map_path): 860 if not os.path.exists(module_map_path): 861 raise Exception('module map "{}" does not exist'.format(module_map_path)) 862 with open(module_map_path, 'r') as f: 863 mm = None 864 # E.g. 865 # 0x2db4000-0x102ddc000 /path/to (arm64) <0D6BBDE0-FF90-3680-899D-8E6F9528E04C> 866 hex_regex = lambda name: r'0x(?P<' + name + r'>[0-9a-f]+)' 867 module_path_regex = r'(?P<path>.+)' 868 arch_regex = r'\((?P<arch>.+)\)' 869 uuid_regex = r'<(?P<uuid>[0-9A-Z-]+)>' 870 line_regex = r'^{}-{}\s+{}\s+{}\s+{}'.format( 871 hex_regex('start_addr'), 872 hex_regex('end_addr'), 873 module_path_regex, 874 arch_regex, 875 uuid_regex 876 ) 877 matcher = re.compile(line_regex) 878 line_num = 0 879 line = 'dummy' 880 while line != '': 881 line = f.readline() 882 line_num += 1 883 if mm is None: 884 if line.startswith('Process module map:'): 885 mm = ModuleMap() 886 continue 887 if line.startswith('End of module map'): 888 break 889 m_obj = matcher.match(line) 890 if not m_obj: 891 raise Exception('Failed to parse line {} "{}"'.format(line_num, line)) 892 arch = m_obj.group('arch') 893 start_addr = int(m_obj.group('start_addr'), base=16) 894 end_addr = int(m_obj.group('end_addr'), base=16) 895 module_path = m_obj.group('path') 896 uuid = m_obj.group('uuid') 897 module_desc = ModuleDesc( 898 name=os.path.basename(module_path), 899 arch=arch, 900 start_addr=start_addr, 901 end_addr=end_addr, 902 module_path=module_path, 903 uuid=uuid 904 ) 905 mm.add_module(module_desc) 906 if mm is not None: 907 logging.debug('Loaded Module map from "{}":\n{}'.format( 908 f.name, 909 str(mm)) 910 ) 911 return mm 912 913class SysRootFilterPlugIn(AsanSymbolizerPlugIn): 914 """ 915 Simple plug-in to add sys root prefix to all binary paths 916 used for symbolication. 917 """ 918 def __init__(self): 919 self.sysroot_path = "" 920 921 def register_cmdline_args(self, parser): 922 parser.add_argument('-s', dest='sys_root', metavar='SYSROOT', 923 help='set path to sysroot for sanitized binaries') 924 925 def process_cmdline_args(self, pargs): 926 if pargs.sys_root is None: 927 # Not being used so remove ourselves. 928 return False 929 self.sysroot_path = pargs.sys_root 930 return True 931 932 def filter_binary_path(self, path): 933 return self.sysroot_path + path 934 935class ModuleMapPlugIn(AsanSymbolizerPlugIn): 936 def __init__(self): 937 self._module_map = None 938 self._uuid_validation = True 939 def register_cmdline_args(self, parser): 940 parser.add_argument('--module-map', 941 help='Path to text file containing module map' 942 'output. See print_module_map ASan option.') 943 parser.add_argument('--skip-uuid-validation', 944 default=False, 945 action='store_true', 946 help='Skips validating UUID of modules using otool.') 947 948 def process_cmdline_args(self, pargs): 949 if not pargs.module_map: 950 return False 951 self._module_map = ModuleMap.parse_from_file(args.module_map) 952 if self._module_map is None: 953 msg = 'Failed to find module map' 954 logging.error(msg) 955 raise Exception(msg) 956 self._uuid_validation = not pargs.skip_uuid_validation 957 return True 958 959 def filter_binary_path(self, binary_path): 960 if os.path.isabs(binary_path): 961 # This is a binary path so transform into 962 # a module name 963 module_name = os.path.basename(binary_path) 964 else: 965 module_name = binary_path 966 return self._module_map.get_module_path_for_symbolication( 967 module_name, 968 self.proxy, 969 self._uuid_validation 970 ) 971 972def add_logging_args(parser): 973 parser.add_argument('--log-dest', 974 default=None, 975 help='Destination path for script logging (default stderr).', 976 ) 977 parser.add_argument('--log-level', 978 choices=['debug', 'info', 'warning', 'error', 'critical'], 979 default='info', 980 help='Log level for script (default: %(default)s).' 981 ) 982 983def setup_logging(): 984 # Set up a parser just for parsing the logging arguments. 985 # This is necessary because logging should be configured before we 986 # perform the main argument parsing. 987 parser = argparse.ArgumentParser(add_help=False) 988 add_logging_args(parser) 989 pargs, unparsed_args = parser.parse_known_args() 990 991 log_level = getattr(logging, pargs.log_level.upper()) 992 if log_level == logging.DEBUG: 993 log_format = '%(levelname)s: [%(funcName)s() %(filename)s:%(lineno)d] %(message)s' 994 else: 995 log_format = '%(levelname)s: %(message)s' 996 basic_config = { 997 'level': log_level, 998 'format': log_format 999 } 1000 log_dest = pargs.log_dest 1001 if log_dest: 1002 basic_config['filename'] = log_dest 1003 logging.basicConfig(**basic_config) 1004 logging.debug('Logging level set to "{}" and directing output to "{}"'.format( 1005 pargs.log_level, 1006 'stderr' if log_dest is None else log_dest) 1007 ) 1008 return unparsed_args 1009 1010def add_load_plugin_args(parser): 1011 parser.add_argument('-p', '--plugins', 1012 help='Load plug-in', nargs='+', default=[]) 1013 1014def setup_plugins(plugin_proxy, args): 1015 parser = argparse.ArgumentParser(add_help=False) 1016 add_load_plugin_args(parser) 1017 pargs , unparsed_args = parser.parse_known_args() 1018 for plugin_path in pargs.plugins: 1019 plugin_proxy.load_plugin_from_file(plugin_path) 1020 # Add built-in plugins. 1021 plugin_proxy.add_plugin(ModuleMapPlugIn()) 1022 plugin_proxy.add_plugin(SysRootFilterPlugIn()) 1023 return unparsed_args 1024 1025if __name__ == '__main__': 1026 remaining_args = setup_logging() 1027 with AsanSymbolizerPlugInProxy() as plugin_proxy: 1028 remaining_args = setup_plugins(plugin_proxy, remaining_args) 1029 parser = argparse.ArgumentParser( 1030 formatter_class=argparse.RawDescriptionHelpFormatter, 1031 description='ASan symbolization script', 1032 epilog=__doc__) 1033 parser.add_argument('path_to_cut', nargs='*', 1034 help='pattern to be cut from the result file path ') 1035 parser.add_argument('-d','--demangle', action='store_true', 1036 help='demangle function names') 1037 parser.add_argument('-c', metavar='CROSS_COMPILE', 1038 help='set prefix for binutils') 1039 parser.add_argument('-l','--logfile', default=sys.stdin, 1040 type=argparse.FileType('r'), 1041 help='set log file name to parse, default is stdin') 1042 parser.add_argument('--force-system-symbolizer', action='store_true', 1043 help='don\'t use llvm-symbolizer') 1044 # Add logging arguments so that `--help` shows them. 1045 add_logging_args(parser) 1046 # Add load plugin arguments so that `--help` shows them. 1047 add_load_plugin_args(parser) 1048 plugin_proxy.register_cmdline_args(parser) 1049 args = parser.parse_args(remaining_args) 1050 plugin_proxy.process_cmdline_args(args) 1051 if args.path_to_cut: 1052 fix_filename_patterns = args.path_to_cut 1053 if args.demangle: 1054 demangle = True 1055 if args.c: 1056 binutils_prefix = args.c 1057 if args.logfile: 1058 logfile = args.logfile 1059 else: 1060 logfile = sys.stdin 1061 if args.force_system_symbolizer: 1062 force_system_symbolizer = True 1063 if force_system_symbolizer: 1064 assert(allow_system_symbolizer) 1065 loop = SymbolizationLoop(plugin_proxy) 1066 loop.process_logfile() 1067