1// -*- mode: c++ -*- 2 3// Copyright (c) 2011, Google Inc. 4// All rights reserved. 5// 6// Redistribution and use in source and binary forms, with or without 7// modification, are permitted provided that the following conditions are 8// met: 9// 10// * Redistributions of source code must retain the above copyright 11// notice, this list of conditions and the following disclaimer. 12// * Redistributions in binary form must reproduce the above 13// copyright notice, this list of conditions and the following disclaimer 14// in the documentation and/or other materials provided with the 15// distribution. 16// * Neither the name of Google Inc. nor the names of its 17// contributors may be used to endorse or promote products derived from 18// this software without specific prior written permission. 19// 20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 32// Author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 33 34// dump_syms.mm: Create a symbol file for use with minidumps 35 36#include "common/mac/dump_syms.h" 37 38#include <Foundation/Foundation.h> 39#include <mach-o/arch.h> 40#include <mach-o/fat.h> 41#include <stdio.h> 42 43#include <ostream> 44#include <string> 45#include <vector> 46 47#include "common/dwarf/bytereader-inl.h" 48#include "common/dwarf/dwarf2reader.h" 49#include "common/dwarf_cfi_to_module.h" 50#include "common/dwarf_cu_to_module.h" 51#include "common/dwarf_line_to_module.h" 52#include "common/mac/file_id.h" 53#include "common/mac/arch_utilities.h" 54#include "common/mac/macho_reader.h" 55#include "common/module.h" 56#include "common/scoped_ptr.h" 57#include "common/stabs_reader.h" 58#include "common/stabs_to_module.h" 59#include "common/symbol_data.h" 60 61#ifndef CPU_TYPE_ARM 62#define CPU_TYPE_ARM (static_cast<cpu_type_t>(12)) 63#endif // CPU_TYPE_ARM 64 65using dwarf2reader::ByteReader; 66using google_breakpad::DwarfCUToModule; 67using google_breakpad::DwarfLineToModule; 68using google_breakpad::FileID; 69using google_breakpad::mach_o::FatReader; 70using google_breakpad::mach_o::Section; 71using google_breakpad::mach_o::Segment; 72using google_breakpad::Module; 73using google_breakpad::StabsReader; 74using google_breakpad::StabsToModule; 75using google_breakpad::scoped_ptr; 76using std::make_pair; 77using std::pair; 78using std::string; 79using std::vector; 80 81namespace google_breakpad { 82 83bool DumpSymbols::Read(NSString *filename) { 84 if (![[NSFileManager defaultManager] fileExistsAtPath:filename]) { 85 fprintf(stderr, "Object file does not exist: %s\n", 86 [filename fileSystemRepresentation]); 87 return false; 88 } 89 90 input_pathname_ = [filename retain]; 91 92 // Does this filename refer to a dSYM bundle? 93 NSBundle *bundle = [NSBundle bundleWithPath:input_pathname_]; 94 95 if (bundle) { 96 // Filenames referring to bundles usually have names of the form 97 // "<basename>.dSYM"; however, if the user has specified a wrapper 98 // suffix (the WRAPPER_SUFFIX and WRAPPER_EXTENSION build settings), 99 // then the name may have the form "<basename>.<extension>.dSYM". In 100 // either case, the resource name for the file containing the DWARF 101 // info within the bundle is <basename>. 102 // 103 // Since there's no way to tell how much to strip off, remove one 104 // extension at a time, and use the first one that 105 // pathForResource:ofType:inDirectory likes. 106 NSString *base_name = [input_pathname_ lastPathComponent]; 107 NSString *dwarf_resource; 108 109 do { 110 NSString *new_base_name = [base_name stringByDeletingPathExtension]; 111 112 // If stringByDeletingPathExtension returned the name unchanged, then 113 // there's nothing more for us to strip off --- lose. 114 if ([new_base_name isEqualToString:base_name]) { 115 fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n", 116 [input_pathname_ fileSystemRepresentation]); 117 return false; 118 } 119 120 // Take the shortened result as our new base_name. 121 base_name = new_base_name; 122 123 // Try to find a DWARF resource in the bundle under the new base_name. 124 dwarf_resource = [bundle pathForResource:base_name 125 ofType:nil inDirectory:@"DWARF"]; 126 } while (!dwarf_resource); 127 128 object_filename_ = [dwarf_resource retain]; 129 } else { 130 object_filename_ = [input_pathname_ retain]; 131 } 132 133 // Read the file's contents into memory. 134 // 135 // The documentation for dataWithContentsOfMappedFile says: 136 // 137 // Because of file mapping restrictions, this method should only be 138 // used if the file is guaranteed to exist for the duration of the 139 // data object’s existence. It is generally safer to use the 140 // dataWithContentsOfFile: method. 141 // 142 // I gather this means that OS X doesn't have (or at least, that method 143 // doesn't use) a form of mapping like Linux's MAP_PRIVATE, where the 144 // process appears to get its own copy of the data, and changes to the 145 // file don't affect memory and vice versa). 146 NSError *error; 147 contents_ = [NSData dataWithContentsOfFile:object_filename_ 148 options:0 149 error:&error]; 150 if (!contents_) { 151 fprintf(stderr, "Error reading object file: %s: %s\n", 152 [object_filename_ fileSystemRepresentation], 153 [[error localizedDescription] UTF8String]); 154 return false; 155 } 156 [contents_ retain]; 157 158 // Get the list of object files present in the file. 159 FatReader::Reporter fat_reporter([object_filename_ 160 fileSystemRepresentation]); 161 FatReader fat_reader(&fat_reporter); 162 if (!fat_reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes]), 163 [contents_ length])) { 164 return false; 165 } 166 167 // Get our own copy of fat_reader's object file list. 168 size_t object_files_count; 169 const struct fat_arch *object_files = 170 fat_reader.object_files(&object_files_count); 171 if (object_files_count == 0) { 172 fprintf(stderr, "Fat binary file contains *no* architectures: %s\n", 173 [object_filename_ fileSystemRepresentation]); 174 return false; 175 } 176 object_files_.resize(object_files_count); 177 memcpy(&object_files_[0], object_files, 178 sizeof(struct fat_arch) * object_files_count); 179 180 return true; 181} 182 183bool DumpSymbols::SetArchitecture(cpu_type_t cpu_type, 184 cpu_subtype_t cpu_subtype) { 185 // Find the best match for the architecture the user requested. 186 const struct fat_arch *best_match 187 = NXFindBestFatArch(cpu_type, cpu_subtype, &object_files_[0], 188 static_cast<uint32_t>(object_files_.size())); 189 if (!best_match) return false; 190 191 // Record the selected object file. 192 selected_object_file_ = best_match; 193 return true; 194} 195 196bool DumpSymbols::SetArchitecture(const std::string &arch_name) { 197 bool arch_set = false; 198 const NXArchInfo *arch_info = 199 google_breakpad::BreakpadGetArchInfoFromName(arch_name.c_str()); 200 if (arch_info) { 201 arch_set = SetArchitecture(arch_info->cputype, arch_info->cpusubtype); 202 } 203 return arch_set; 204} 205 206string DumpSymbols::Identifier() { 207 FileID file_id([object_filename_ fileSystemRepresentation]); 208 unsigned char identifier_bytes[16]; 209 cpu_type_t cpu_type = selected_object_file_->cputype; 210 cpu_subtype_t cpu_subtype = selected_object_file_->cpusubtype; 211 if (!file_id.MachoIdentifier(cpu_type, cpu_subtype, identifier_bytes)) { 212 fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n", 213 [object_filename_ fileSystemRepresentation]); 214 return ""; 215 } 216 217 char identifier_string[40]; 218 FileID::ConvertIdentifierToString(identifier_bytes, identifier_string, 219 sizeof(identifier_string)); 220 221 string compacted(identifier_string); 222 for(size_t i = compacted.find('-'); i != string::npos; 223 i = compacted.find('-', i)) 224 compacted.erase(i, 1); 225 226 return compacted; 227} 228 229// A line-to-module loader that accepts line number info parsed by 230// dwarf2reader::LineInfo and populates a Module and a line vector 231// with the results. 232class DumpSymbols::DumperLineToModule: 233 public DwarfCUToModule::LineToModuleHandler { 234 public: 235 // Create a line-to-module converter using BYTE_READER. 236 DumperLineToModule(dwarf2reader::ByteReader *byte_reader) 237 : byte_reader_(byte_reader) { } 238 239 void StartCompilationUnit(const string& compilation_dir) { 240 compilation_dir_ = compilation_dir; 241 } 242 243 void ReadProgram(const char *program, uint64 length, 244 Module *module, vector<Module::Line> *lines) { 245 DwarfLineToModule handler(module, compilation_dir_, lines); 246 dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler); 247 parser.Start(); 248 } 249 private: 250 string compilation_dir_; 251 dwarf2reader::ByteReader *byte_reader_; // WEAK 252}; 253 254bool DumpSymbols::ReadDwarf(google_breakpad::Module *module, 255 const mach_o::Reader &macho_reader, 256 const mach_o::SectionMap &dwarf_sections, 257 bool handle_inter_cu_refs) const { 258 // Build a byte reader of the appropriate endianness. 259 ByteReader byte_reader(macho_reader.big_endian() 260 ? dwarf2reader::ENDIANNESS_BIG 261 : dwarf2reader::ENDIANNESS_LITTLE); 262 263 // Construct a context for this file. 264 DwarfCUToModule::FileContext file_context(selected_object_name_, 265 module, 266 handle_inter_cu_refs); 267 268 // Build a dwarf2reader::SectionMap from our mach_o::SectionMap. 269 for (mach_o::SectionMap::const_iterator it = dwarf_sections.begin(); 270 it != dwarf_sections.end(); ++it) { 271 file_context.AddSectionToSectionMap( 272 it->first, 273 reinterpret_cast<const char *>(it->second.contents.start), 274 it->second.contents.Size()); 275 } 276 277 // Find the __debug_info section. 278 dwarf2reader::SectionMap::const_iterator debug_info_entry = 279 file_context.section_map().find("__debug_info"); 280 assert(debug_info_entry != file_context.section_map().end()); 281 const std::pair<const char*, uint64>& debug_info_section = 282 debug_info_entry->second; 283 // There had better be a __debug_info section! 284 if (!debug_info_section.first) { 285 fprintf(stderr, "%s: __DWARF segment of file has no __debug_info section\n", 286 selected_object_name_.c_str()); 287 return false; 288 } 289 290 // Build a line-to-module loader for the root handler to use. 291 DumperLineToModule line_to_module(&byte_reader); 292 293 // Walk the __debug_info section, one compilation unit at a time. 294 uint64 debug_info_length = debug_info_section.second; 295 for (uint64 offset = 0; offset < debug_info_length;) { 296 // Make a handler for the root DIE that populates MODULE with the 297 // debug info. 298 DwarfCUToModule::WarningReporter reporter(selected_object_name_, 299 offset); 300 DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter); 301 // Make a Dwarf2Handler that drives our DIEHandler. 302 dwarf2reader::DIEDispatcher die_dispatcher(&root_handler); 303 // Make a DWARF parser for the compilation unit at OFFSET. 304 dwarf2reader::CompilationUnit dwarf_reader(file_context.section_map(), 305 offset, 306 &byte_reader, 307 &die_dispatcher); 308 // Process the entire compilation unit; get the offset of the next. 309 offset += dwarf_reader.Start(); 310 } 311 312 return true; 313} 314 315bool DumpSymbols::ReadCFI(google_breakpad::Module *module, 316 const mach_o::Reader &macho_reader, 317 const mach_o::Section §ion, 318 bool eh_frame) const { 319 // Find the appropriate set of register names for this file's 320 // architecture. 321 vector<string> register_names; 322 switch (macho_reader.cpu_type()) { 323 case CPU_TYPE_X86: 324 register_names = DwarfCFIToModule::RegisterNames::I386(); 325 break; 326 case CPU_TYPE_X86_64: 327 register_names = DwarfCFIToModule::RegisterNames::X86_64(); 328 break; 329 case CPU_TYPE_ARM: 330 register_names = DwarfCFIToModule::RegisterNames::ARM(); 331 break; 332 default: { 333 const NXArchInfo *arch = google_breakpad::BreakpadGetArchInfoFromCpuType( 334 macho_reader.cpu_type(), macho_reader.cpu_subtype()); 335 fprintf(stderr, "%s: cannot convert DWARF call frame information for ", 336 selected_object_name_.c_str()); 337 if (arch) 338 fprintf(stderr, "architecture '%s'", arch->name); 339 else 340 fprintf(stderr, "architecture %d,%d", 341 macho_reader.cpu_type(), macho_reader.cpu_subtype()); 342 fprintf(stderr, " to Breakpad symbol file: no register name table\n"); 343 return false; 344 } 345 } 346 347 // Find the call frame information and its size. 348 const char *cfi = reinterpret_cast<const char *>(section.contents.start); 349 size_t cfi_size = section.contents.Size(); 350 351 // Plug together the parser, handler, and their entourages. 352 DwarfCFIToModule::Reporter module_reporter(selected_object_name_, 353 section.section_name); 354 DwarfCFIToModule handler(module, register_names, &module_reporter); 355 dwarf2reader::ByteReader byte_reader(macho_reader.big_endian() ? 356 dwarf2reader::ENDIANNESS_BIG : 357 dwarf2reader::ENDIANNESS_LITTLE); 358 byte_reader.SetAddressSize(macho_reader.bits_64() ? 8 : 4); 359 // At the moment, according to folks at Apple and some cursory 360 // investigation, Mac OS X only uses DW_EH_PE_pcrel-based pointers, so 361 // this is the only base address the CFI parser will need. 362 byte_reader.SetCFIDataBase(section.address, cfi); 363 364 dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(selected_object_name_, 365 section.section_name); 366 dwarf2reader::CallFrameInfo parser(cfi, cfi_size, 367 &byte_reader, &handler, &dwarf_reporter, 368 eh_frame); 369 parser.Start(); 370 return true; 371} 372 373// A LoadCommandHandler that loads whatever debugging data it finds into a 374// Module. 375class DumpSymbols::LoadCommandDumper: 376 public mach_o::Reader::LoadCommandHandler { 377 public: 378 // Create a load command dumper handling load commands from READER's 379 // file, and adding data to MODULE. 380 LoadCommandDumper(const DumpSymbols &dumper, 381 google_breakpad::Module *module, 382 const mach_o::Reader &reader, 383 SymbolData symbol_data, 384 bool handle_inter_cu_refs) 385 : dumper_(dumper), 386 module_(module), 387 reader_(reader), 388 symbol_data_(symbol_data), 389 handle_inter_cu_refs_(handle_inter_cu_refs) { } 390 391 bool SegmentCommand(const mach_o::Segment &segment); 392 bool SymtabCommand(const ByteBuffer &entries, const ByteBuffer &strings); 393 394 private: 395 const DumpSymbols &dumper_; 396 google_breakpad::Module *module_; // WEAK 397 const mach_o::Reader &reader_; 398 const SymbolData symbol_data_; 399 const bool handle_inter_cu_refs_; 400}; 401 402bool DumpSymbols::LoadCommandDumper::SegmentCommand(const Segment &segment) { 403 mach_o::SectionMap section_map; 404 if (!reader_.MapSegmentSections(segment, §ion_map)) 405 return false; 406 407 if (segment.name == "__TEXT") { 408 module_->SetLoadAddress(segment.vmaddr); 409 if (symbol_data_ != NO_CFI) { 410 mach_o::SectionMap::const_iterator eh_frame = 411 section_map.find("__eh_frame"); 412 if (eh_frame != section_map.end()) { 413 // If there is a problem reading this, don't treat it as a fatal error. 414 dumper_.ReadCFI(module_, reader_, eh_frame->second, true); 415 } 416 } 417 return true; 418 } 419 420 if (segment.name == "__DWARF") { 421 if (symbol_data_ != ONLY_CFI) { 422 if (!dumper_.ReadDwarf(module_, reader_, section_map, 423 handle_inter_cu_refs_)) { 424 return false; 425 } 426 } 427 if (symbol_data_ != NO_CFI) { 428 mach_o::SectionMap::const_iterator debug_frame 429 = section_map.find("__debug_frame"); 430 if (debug_frame != section_map.end()) { 431 // If there is a problem reading this, don't treat it as a fatal error. 432 dumper_.ReadCFI(module_, reader_, debug_frame->second, false); 433 } 434 } 435 } 436 437 return true; 438} 439 440bool DumpSymbols::LoadCommandDumper::SymtabCommand(const ByteBuffer &entries, 441 const ByteBuffer &strings) { 442 StabsToModule stabs_to_module(module_); 443 // Mac OS X STABS are never "unitized", and the size of the 'value' field 444 // matches the address size of the executable. 445 StabsReader stabs_reader(entries.start, entries.Size(), 446 strings.start, strings.Size(), 447 reader_.big_endian(), 448 reader_.bits_64() ? 8 : 4, 449 true, 450 &stabs_to_module); 451 if (!stabs_reader.Process()) 452 return false; 453 stabs_to_module.Finalize(); 454 return true; 455} 456 457bool DumpSymbols::ReadSymbolData(Module** out_module) { 458 // Select an object file, if SetArchitecture hasn't been called to set one 459 // explicitly. 460 if (!selected_object_file_) { 461 // If there's only one architecture, that's the one. 462 if (object_files_.size() == 1) 463 selected_object_file_ = &object_files_[0]; 464 else { 465 // Look for an object file whose architecture matches our own. 466 const NXArchInfo *local_arch = NXGetLocalArchInfo(); 467 if (!SetArchitecture(local_arch->cputype, local_arch->cpusubtype)) { 468 fprintf(stderr, "%s: object file contains more than one" 469 " architecture, none of which match the current" 470 " architecture; specify an architecture explicitly" 471 " with '-a ARCH' to resolve the ambiguity\n", 472 [object_filename_ fileSystemRepresentation]); 473 return false; 474 } 475 } 476 } 477 478 assert(selected_object_file_); 479 480 // Find the name of the selected file's architecture, to appear in 481 // the MODULE record and in error messages. 482 const NXArchInfo *selected_arch_info = 483 google_breakpad::BreakpadGetArchInfoFromCpuType( 484 selected_object_file_->cputype, selected_object_file_->cpusubtype); 485 486 const char *selected_arch_name = selected_arch_info->name; 487 if (strcmp(selected_arch_name, "i386") == 0) 488 selected_arch_name = "x86"; 489 490 // Produce a name to use in error messages that includes the 491 // filename, and the architecture, if there is more than one. 492 selected_object_name_ = [object_filename_ UTF8String]; 493 if (object_files_.size() > 1) { 494 selected_object_name_ += ", architecture "; 495 selected_object_name_ + selected_arch_name; 496 } 497 498 // Compute a module name, to appear in the MODULE record. 499 NSString *module_name = [object_filename_ lastPathComponent]; 500 501 // Choose an identifier string, to appear in the MODULE record. 502 string identifier = Identifier(); 503 if (identifier.empty()) 504 return false; 505 identifier += "0"; 506 507 // Create a module to hold the debugging information. 508 scoped_ptr<Module> module(new Module([module_name UTF8String], 509 "mac", 510 selected_arch_name, 511 identifier)); 512 513 // Parse the selected object file. 514 mach_o::Reader::Reporter reporter(selected_object_name_); 515 mach_o::Reader reader(&reporter); 516 if (!reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes]) 517 + selected_object_file_->offset, 518 selected_object_file_->size, 519 selected_object_file_->cputype, 520 selected_object_file_->cpusubtype)) 521 return false; 522 523 // Walk its load commands, and deal with whatever is there. 524 LoadCommandDumper load_command_dumper(*this, module.get(), reader, 525 symbol_data_, handle_inter_cu_refs_); 526 if (!reader.WalkLoadCommands(&load_command_dumper)) 527 return false; 528 529 *out_module = module.release(); 530 531 return true; 532} 533 534bool DumpSymbols::WriteSymbolFile(std::ostream &stream) { 535 Module* module = NULL; 536 537 if (ReadSymbolData(&module) && module) { 538 bool res = module->Write(stream, symbol_data_); 539 delete module; 540 return res; 541 } 542 543 return false; 544} 545 546} // namespace google_breakpad 547