1#!/usr/bin/env python 2 3#---------------------------------------------------------------------- 4# Be sure to add the python path that points to the LLDB shared library. 5# 6# To use this in the embedded python interpreter using "lldb": 7# 8# cd /path/containing/crashlog.py 9# lldb 10# (lldb) script import crashlog 11# "crashlog" command installed, type "crashlog --help" for detailed help 12# (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash 13# 14# The benefit of running the crashlog command inside lldb in the 15# embedded python interpreter is when the command completes, there 16# will be a target with all of the files loaded at the locations 17# described in the crash log. Only the files that have stack frames 18# in the backtrace will be loaded unless the "--load-all" option 19# has been specified. This allows users to explore the program in the 20# state it was in right at crash time. 21# 22# On MacOSX csh, tcsh: 23# ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash ) 24# 25# On MacOSX sh, bash: 26# PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash 27#---------------------------------------------------------------------- 28 29import lldb 30import optparse 31import os 32import plistlib 33import re 34import shlex 35import sys 36import time 37import uuid 38 39 40class Address: 41 """Class that represents an address that will be symbolicated""" 42 43 def __init__(self, target, load_addr): 44 self.target = target 45 self.load_addr = load_addr # The load address that this object represents 46 # the resolved lldb.SBAddress (if any), named so_addr for 47 # section/offset address 48 self.so_addr = None 49 self.sym_ctx = None # The cached symbol context for this address 50 # Any original textual description of this address to be used as a 51 # backup in case symbolication fails 52 self.description = None 53 self.symbolication = None # The cached symbolicated string that describes this address 54 self.inlined = False 55 56 def __str__(self): 57 s = "%#16.16x" % (self.load_addr) 58 if self.symbolication: 59 s += " %s" % (self.symbolication) 60 elif self.description: 61 s += " %s" % (self.description) 62 elif self.so_addr: 63 s += " %s" % (self.so_addr) 64 return s 65 66 def resolve_addr(self): 67 if self.so_addr is None: 68 self.so_addr = self.target.ResolveLoadAddress(self.load_addr) 69 return self.so_addr 70 71 def is_inlined(self): 72 return self.inlined 73 74 def get_symbol_context(self): 75 if self.sym_ctx is None: 76 sb_addr = self.resolve_addr() 77 if sb_addr: 78 self.sym_ctx = self.target.ResolveSymbolContextForAddress( 79 sb_addr, lldb.eSymbolContextEverything) 80 else: 81 self.sym_ctx = lldb.SBSymbolContext() 82 return self.sym_ctx 83 84 def get_instructions(self): 85 sym_ctx = self.get_symbol_context() 86 if sym_ctx: 87 function = sym_ctx.GetFunction() 88 if function: 89 return function.GetInstructions(self.target) 90 return sym_ctx.GetSymbol().GetInstructions(self.target) 91 return None 92 93 def symbolicate(self, verbose=False): 94 if self.symbolication is None: 95 self.symbolication = '' 96 self.inlined = False 97 sym_ctx = self.get_symbol_context() 98 if sym_ctx: 99 module = sym_ctx.GetModule() 100 if module: 101 # Print full source file path in verbose mode 102 if verbose: 103 self.symbolication += str(module.GetFileSpec()) + '`' 104 else: 105 self.symbolication += module.GetFileSpec().GetFilename() + '`' 106 function_start_load_addr = -1 107 function = sym_ctx.GetFunction() 108 block = sym_ctx.GetBlock() 109 line_entry = sym_ctx.GetLineEntry() 110 symbol = sym_ctx.GetSymbol() 111 inlined_block = block.GetContainingInlinedBlock() 112 if function: 113 self.symbolication += function.GetName() 114 115 if inlined_block: 116 self.inlined = True 117 self.symbolication += ' [inlined] ' + \ 118 inlined_block.GetInlinedName() 119 block_range_idx = inlined_block.GetRangeIndexForBlockAddress( 120 self.so_addr) 121 if block_range_idx < lldb.UINT32_MAX: 122 block_range_start_addr = inlined_block.GetRangeStartAddress( 123 block_range_idx) 124 function_start_load_addr = block_range_start_addr.GetLoadAddress( 125 self.target) 126 if function_start_load_addr == -1: 127 function_start_load_addr = function.GetStartAddress().GetLoadAddress(self.target) 128 elif symbol: 129 self.symbolication += symbol.GetName() 130 function_start_load_addr = symbol.GetStartAddress().GetLoadAddress(self.target) 131 else: 132 self.symbolication = '' 133 return False 134 135 # Dump the offset from the current function or symbol if it 136 # is non zero 137 function_offset = self.load_addr - function_start_load_addr 138 if function_offset > 0: 139 self.symbolication += " + %u" % (function_offset) 140 elif function_offset < 0: 141 self.symbolication += " %i (invalid negative offset, file a bug) " % function_offset 142 143 # Print out any line information if any is available 144 if line_entry.GetFileSpec(): 145 # Print full source file path in verbose mode 146 if verbose: 147 self.symbolication += ' at %s' % line_entry.GetFileSpec() 148 else: 149 self.symbolication += ' at %s' % line_entry.GetFileSpec().GetFilename() 150 self.symbolication += ':%u' % line_entry.GetLine() 151 column = line_entry.GetColumn() 152 if column > 0: 153 self.symbolication += ':%u' % column 154 return True 155 return False 156 157 158class Section: 159 """Class that represents an load address range""" 160 sect_info_regex = re.compile('(?P<name>[^=]+)=(?P<range>.*)') 161 addr_regex = re.compile('^\s*(?P<start>0x[0-9A-Fa-f]+)\s*$') 162 range_regex = re.compile( 163 '^\s*(?P<start>0x[0-9A-Fa-f]+)\s*(?P<op>[-+])\s*(?P<end>0x[0-9A-Fa-f]+)\s*$') 164 165 def __init__(self, start_addr=None, end_addr=None, name=None): 166 self.start_addr = start_addr 167 self.end_addr = end_addr 168 self.name = name 169 170 @classmethod 171 def InitWithSBTargetAndSBSection(cls, target, section): 172 sect_load_addr = section.GetLoadAddress(target) 173 if sect_load_addr != lldb.LLDB_INVALID_ADDRESS: 174 obj = cls( 175 sect_load_addr, 176 sect_load_addr + 177 section.size, 178 section.name) 179 return obj 180 else: 181 return None 182 183 def contains(self, addr): 184 return self.start_addr <= addr and addr < self.end_addr 185 186 def set_from_string(self, s): 187 match = self.sect_info_regex.match(s) 188 if match: 189 self.name = match.group('name') 190 range_str = match.group('range') 191 addr_match = self.addr_regex.match(range_str) 192 if addr_match: 193 self.start_addr = int(addr_match.group('start'), 16) 194 self.end_addr = None 195 return True 196 197 range_match = self.range_regex.match(range_str) 198 if range_match: 199 self.start_addr = int(range_match.group('start'), 16) 200 self.end_addr = int(range_match.group('end'), 16) 201 op = range_match.group('op') 202 if op == '+': 203 self.end_addr += self.start_addr 204 return True 205 print('error: invalid section info string "%s"' % s) 206 print('Valid section info formats are:') 207 print('Format Example Description') 208 print('--------------------- -----------------------------------------------') 209 print('<name>=<base> __TEXT=0x123000 Section from base address only') 210 print('<name>=<base>-<end> __TEXT=0x123000-0x124000 Section from base address and end address') 211 print('<name>=<base>+<size> __TEXT=0x123000+0x1000 Section from base address and size') 212 return False 213 214 def __str__(self): 215 if self.name: 216 if self.end_addr is not None: 217 if self.start_addr is not None: 218 return "%s=[0x%16.16x - 0x%16.16x)" % ( 219 self.name, self.start_addr, self.end_addr) 220 else: 221 if self.start_addr is not None: 222 return "%s=0x%16.16x" % (self.name, self.start_addr) 223 return self.name 224 return "<invalid>" 225 226 227class Image: 228 """A class that represents an executable image and any associated data""" 229 230 def __init__(self, path, uuid=None): 231 self.path = path 232 self.resolved_path = None 233 self.resolved = False 234 self.unavailable = False 235 self.uuid = uuid 236 self.section_infos = list() 237 self.identifier = None 238 self.version = None 239 self.arch = None 240 self.module = None 241 self.symfile = None 242 self.slide = None 243 244 @classmethod 245 def InitWithSBTargetAndSBModule(cls, target, module): 246 '''Initialize this Image object with a module from a target.''' 247 obj = cls(module.file.fullpath, module.uuid) 248 obj.resolved_path = module.platform_file.fullpath 249 obj.resolved = True 250 for section in module.sections: 251 symb_section = Section.InitWithSBTargetAndSBSection( 252 target, section) 253 if symb_section: 254 obj.section_infos.append(symb_section) 255 obj.arch = module.triple 256 obj.module = module 257 obj.symfile = None 258 obj.slide = None 259 return obj 260 261 def dump(self, prefix): 262 print("%s%s" % (prefix, self)) 263 264 def debug_dump(self): 265 print('path = "%s"' % (self.path)) 266 print('resolved_path = "%s"' % (self.resolved_path)) 267 print('resolved = %i' % (self.resolved)) 268 print('unavailable = %i' % (self.unavailable)) 269 print('uuid = %s' % (self.uuid)) 270 print('section_infos = %s' % (self.section_infos)) 271 print('identifier = "%s"' % (self.identifier)) 272 print('version = %s' % (self.version)) 273 print('arch = %s' % (self.arch)) 274 print('module = %s' % (self.module)) 275 print('symfile = "%s"' % (self.symfile)) 276 print('slide = %i (0x%x)' % (self.slide, self.slide)) 277 278 def __str__(self): 279 s = '' 280 if self.uuid: 281 s += "%s " % (self.get_uuid()) 282 if self.arch: 283 s += "%s " % (self.arch) 284 if self.version: 285 s += "%s " % (self.version) 286 resolved_path = self.get_resolved_path() 287 if resolved_path: 288 s += "%s " % (resolved_path) 289 for section_info in self.section_infos: 290 s += ", %s" % (section_info) 291 if self.slide is not None: 292 s += ', slide = 0x%16.16x' % self.slide 293 return s 294 295 def add_section(self, section): 296 # print "added '%s' to '%s'" % (section, self.path) 297 self.section_infos.append(section) 298 299 def get_section_containing_load_addr(self, load_addr): 300 for section_info in self.section_infos: 301 if section_info.contains(load_addr): 302 return section_info 303 return None 304 305 def get_resolved_path(self): 306 if self.resolved_path: 307 return self.resolved_path 308 elif self.path: 309 return self.path 310 return None 311 312 def get_resolved_path_basename(self): 313 path = self.get_resolved_path() 314 if path: 315 return os.path.basename(path) 316 return None 317 318 def symfile_basename(self): 319 if self.symfile: 320 return os.path.basename(self.symfile) 321 return None 322 323 def has_section_load_info(self): 324 return self.section_infos or self.slide is not None 325 326 def load_module(self, target): 327 if self.unavailable: 328 return None # We already warned that we couldn't find this module, so don't return an error string 329 # Load this module into "target" using the section infos to 330 # set the section load addresses 331 if self.has_section_load_info(): 332 if target: 333 if self.module: 334 if self.section_infos: 335 num_sections_loaded = 0 336 for section_info in self.section_infos: 337 if section_info.name: 338 section = self.module.FindSection( 339 section_info.name) 340 if section: 341 error = target.SetSectionLoadAddress( 342 section, section_info.start_addr) 343 if error.Success(): 344 num_sections_loaded += 1 345 else: 346 return 'error: %s' % error.GetCString() 347 else: 348 return 'error: unable to find the section named "%s"' % section_info.name 349 else: 350 return 'error: unable to find "%s" section in "%s"' % ( 351 range.name, self.get_resolved_path()) 352 if num_sections_loaded == 0: 353 return 'error: no sections were successfully loaded' 354 else: 355 err = target.SetModuleLoadAddress( 356 self.module, self.slide) 357 if err.Fail(): 358 return err.GetCString() 359 return None 360 else: 361 return 'error: invalid module' 362 else: 363 return 'error: invalid target' 364 else: 365 return 'error: no section infos' 366 367 def add_module(self, target): 368 '''Add the Image described in this object to "target" and load the sections if "load" is True.''' 369 if target: 370 # Try and find using UUID only first so that paths need not match 371 # up 372 uuid_str = self.get_normalized_uuid_string() 373 if uuid_str: 374 self.module = target.AddModule(None, None, uuid_str) 375 if not self.module: 376 self.locate_module_and_debug_symbols() 377 if self.unavailable: 378 return None 379 resolved_path = self.get_resolved_path() 380 self.module = target.AddModule( 381 resolved_path, None, uuid_str, self.symfile) 382 if not self.module: 383 return 'error: unable to get module for (%s) "%s"' % ( 384 self.arch, self.get_resolved_path()) 385 if self.has_section_load_info(): 386 return self.load_module(target) 387 else: 388 return None # No sections, the module was added to the target, so success 389 else: 390 return 'error: invalid target' 391 392 def locate_module_and_debug_symbols(self): 393 # By default, just use the paths that were supplied in: 394 # self.path 395 # self.resolved_path 396 # self.module 397 # self.symfile 398 # Subclasses can inherit from this class and override this function 399 self.resolved = True 400 return True 401 402 def get_uuid(self): 403 if not self.uuid and self.module: 404 self.uuid = uuid.UUID(self.module.GetUUIDString()) 405 return self.uuid 406 407 def get_normalized_uuid_string(self): 408 if self.uuid: 409 return str(self.uuid).upper() 410 return None 411 412 def create_target(self, debugger): 413 '''Create a target using the information in this Image object.''' 414 if self.unavailable: 415 return None 416 417 if self.locate_module_and_debug_symbols(): 418 resolved_path = self.get_resolved_path() 419 path_spec = lldb.SBFileSpec(resolved_path) 420 error = lldb.SBError() 421 target = debugger.CreateTarget( 422 resolved_path, self.arch, None, False, error) 423 if target: 424 self.module = target.FindModule(path_spec) 425 if self.has_section_load_info(): 426 err = self.load_module(target) 427 if err: 428 print('ERROR: ', err) 429 return target 430 else: 431 print('error: unable to create a valid target for (%s) "%s"' % (self.arch, self.path)) 432 else: 433 print('error: unable to locate main executable (%s) "%s"' % (self.arch, self.path)) 434 return None 435 436 437class Symbolicator: 438 439 def __init__(self, debugger=None, target=None, images=list()): 440 """A class the represents the information needed to symbolicate 441 addresses in a program. 442 443 Do not call this initializer directly, but rather use the factory 444 methods. 445 """ 446 self.debugger = debugger 447 self.target = target 448 self.images = images # a list of images to be used when symbolicating 449 self.addr_mask = 0xffffffffffffffff 450 451 @classmethod 452 def InitWithSBTarget(cls, target): 453 """Initialize a new Symbolicator with an existing SBTarget.""" 454 obj = cls(target=target) 455 triple = target.triple 456 if triple: 457 arch = triple.split('-')[0] 458 if "arm" in arch: 459 obj.addr_mask = 0xfffffffffffffffe 460 461 for module in target.modules: 462 image = Image.InitWithSBTargetAndSBModule(target, module) 463 obj.images.append(image) 464 return obj 465 466 @classmethod 467 def InitWithSBDebugger(cls, debugger, images): 468 """Initialize a new Symbolicator with an existing debugger and list of 469 images. The Symbolicator will create the target.""" 470 obj = cls(debugger=debugger, images=images) 471 return obj 472 473 def __str__(self): 474 s = "Symbolicator:\n" 475 if self.target: 476 s += "Target = '%s'\n" % (self.target) 477 s += "Target modules:\n" 478 for m in self.target.modules: 479 s += str(m) + "\n" 480 s += "Images:\n" 481 for image in self.images: 482 s += ' %s\n' % (image) 483 return s 484 485 def find_images_with_identifier(self, identifier): 486 images = list() 487 for image in self.images: 488 if image.identifier == identifier: 489 images.append(image) 490 if len(images) == 0: 491 regex_text = '^.*\.%s$' % (re.escape(identifier)) 492 regex = re.compile(regex_text) 493 for image in self.images: 494 if regex.match(image.identifier): 495 images.append(image) 496 return images 497 498 def find_image_containing_load_addr(self, load_addr): 499 for image in self.images: 500 if image.get_section_containing_load_addr(load_addr): 501 return image 502 return None 503 504 def create_target(self): 505 if self.target: 506 return self.target 507 508 if self.images: 509 for image in self.images: 510 self.target = image.create_target(self.debugger) 511 if self.target: 512 if self.target.GetAddressByteSize() == 4: 513 triple = self.target.triple 514 if triple: 515 arch = triple.split('-')[0] 516 if "arm" in arch: 517 self.addr_mask = 0xfffffffffffffffe 518 return self.target 519 return None 520 521 def symbolicate(self, load_addr, verbose=False): 522 if not self.target: 523 self.create_target() 524 if self.target: 525 live_process = False 526 process = self.target.process 527 if process: 528 state = process.state 529 if state > lldb.eStateUnloaded and state < lldb.eStateDetached: 530 live_process = True 531 # If we don't have a live process, we can attempt to find the image 532 # that a load address belongs to and lazily load its module in the 533 # target, but we shouldn't do any of this if we have a live process 534 if not live_process: 535 image = self.find_image_containing_load_addr(load_addr) 536 if image: 537 image.add_module(self.target) 538 symbolicated_address = Address(self.target, load_addr) 539 if symbolicated_address.symbolicate(verbose): 540 if symbolicated_address.so_addr: 541 symbolicated_addresses = list() 542 symbolicated_addresses.append(symbolicated_address) 543 # See if we were able to reconstruct anything? 544 while True: 545 inlined_parent_so_addr = lldb.SBAddress() 546 inlined_parent_sym_ctx = symbolicated_address.sym_ctx.GetParentOfInlinedScope( 547 symbolicated_address.so_addr, inlined_parent_so_addr) 548 if not inlined_parent_sym_ctx: 549 break 550 if not inlined_parent_so_addr: 551 break 552 553 symbolicated_address = Address( 554 self.target, inlined_parent_so_addr.GetLoadAddress( 555 self.target)) 556 symbolicated_address.sym_ctx = inlined_parent_sym_ctx 557 symbolicated_address.so_addr = inlined_parent_so_addr 558 symbolicated_address.symbolicate(verbose) 559 560 # push the new frame onto the new frame stack 561 symbolicated_addresses.append(symbolicated_address) 562 563 if symbolicated_addresses: 564 return symbolicated_addresses 565 else: 566 print('error: no target in Symbolicator') 567 return None 568 569 570def disassemble_instructions( 571 target, 572 instructions, 573 pc, 574 insts_before_pc, 575 insts_after_pc, 576 non_zeroeth_frame): 577 lines = list() 578 pc_index = -1 579 comment_column = 50 580 for inst_idx, inst in enumerate(instructions): 581 inst_pc = inst.GetAddress().GetLoadAddress(target) 582 if pc == inst_pc: 583 pc_index = inst_idx 584 mnemonic = inst.GetMnemonic(target) 585 operands = inst.GetOperands(target) 586 comment = inst.GetComment(target) 587 lines.append("%#16.16x: %8s %s" % (inst_pc, mnemonic, operands)) 588 if comment: 589 line_len = len(lines[-1]) 590 if line_len < comment_column: 591 lines[-1] += ' ' * (comment_column - line_len) 592 lines[-1] += "; %s" % comment 593 594 if pc_index >= 0: 595 # If we are disassembling the non-zeroeth frame, we need to backup the 596 # PC by 1 597 if non_zeroeth_frame and pc_index > 0: 598 pc_index = pc_index - 1 599 if insts_before_pc == -1: 600 start_idx = 0 601 else: 602 start_idx = pc_index - insts_before_pc 603 if start_idx < 0: 604 start_idx = 0 605 if insts_before_pc == -1: 606 end_idx = inst_idx 607 else: 608 end_idx = pc_index + insts_after_pc 609 if end_idx > inst_idx: 610 end_idx = inst_idx 611 for i in range(start_idx, end_idx + 1): 612 if i == pc_index: 613 print(' -> ', lines[i]) 614 else: 615 print(' ', lines[i]) 616 617 618def print_module_section_data(section): 619 print(section) 620 section_data = section.GetSectionData() 621 if section_data: 622 ostream = lldb.SBStream() 623 section_data.GetDescription(ostream, section.GetFileAddress()) 624 print(ostream.GetData()) 625 626 627def print_module_section(section, depth): 628 print(section) 629 if depth > 0: 630 num_sub_sections = section.GetNumSubSections() 631 for sect_idx in range(num_sub_sections): 632 print_module_section( 633 section.GetSubSectionAtIndex(sect_idx), depth - 1) 634 635 636def print_module_sections(module, depth): 637 for sect in module.section_iter(): 638 print_module_section(sect, depth) 639 640 641def print_module_symbols(module): 642 for sym in module: 643 print(sym) 644 645 646def Symbolicate(debugger, command_args): 647 648 usage = "usage: %prog [options] <addr1> [addr2 ...]" 649 description = '''Symbolicate one or more addresses using LLDB's python scripting API..''' 650 parser = optparse.OptionParser( 651 description=description, 652 prog='crashlog.py', 653 usage=usage) 654 parser.add_option( 655 '-v', 656 '--verbose', 657 action='store_true', 658 dest='verbose', 659 help='display verbose debug info', 660 default=False) 661 parser.add_option( 662 '-p', 663 '--platform', 664 type='string', 665 metavar='platform', 666 dest='platform', 667 help='Specify the platform to use when creating the debug target. Valid values include "localhost", "darwin-kernel", "ios-simulator", "remote-freebsd", "remote-macosx", "remote-ios", "remote-linux".') 668 parser.add_option( 669 '-f', 670 '--file', 671 type='string', 672 metavar='file', 673 dest='file', 674 help='Specify a file to use when symbolicating') 675 parser.add_option( 676 '-a', 677 '--arch', 678 type='string', 679 metavar='arch', 680 dest='arch', 681 help='Specify a architecture to use when symbolicating') 682 parser.add_option( 683 '-s', 684 '--slide', 685 type='int', 686 metavar='slide', 687 dest='slide', 688 help='Specify the slide to use on the file specified with the --file option', 689 default=None) 690 parser.add_option( 691 '--section', 692 type='string', 693 action='append', 694 dest='section_strings', 695 help='specify <sect-name>=<start-addr> or <sect-name>=<start-addr>-<end-addr>') 696 try: 697 (options, args) = parser.parse_args(command_args) 698 except: 699 return 700 symbolicator = Symbolicator(debugger) 701 images = list() 702 if options.file: 703 image = Image(options.file) 704 image.arch = options.arch 705 # Add any sections that were specified with one or more --section 706 # options 707 if options.section_strings: 708 for section_str in options.section_strings: 709 section = Section() 710 if section.set_from_string(section_str): 711 image.add_section(section) 712 else: 713 sys.exit(1) 714 if options.slide is not None: 715 image.slide = options.slide 716 symbolicator.images.append(image) 717 718 target = symbolicator.create_target() 719 if options.verbose: 720 print(symbolicator) 721 if target: 722 for addr_str in args: 723 addr = int(addr_str, 0) 724 symbolicated_addrs = symbolicator.symbolicate( 725 addr, options.verbose) 726 for symbolicated_addr in symbolicated_addrs: 727 print(symbolicated_addr) 728 print() 729 else: 730 print('error: no target for %s' % (symbolicator)) 731 732if __name__ == '__main__': 733 # Create a new debugger instance 734 debugger = lldb.SBDebugger.Create() 735 Symbolicate(debugger, sys.argv[1:]) 736 SBDebugger.Destroy(debugger) 737