1// -*- mode: c++ -*- 2 3// Copyright (c) 2010, Google Inc. 4// All rights reserved. 5// 6// Redistribution and use in source and binary forms, with or without 7// modification, are permitted provided that the following conditions are 8// met: 9// 10// * Redistributions of source code must retain the above copyright 11// notice, this list of conditions and the following disclaimer. 12// * Redistributions in binary form must reproduce the above 13// copyright notice, this list of conditions and the following disclaimer 14// in the documentation and/or other materials provided with the 15// distribution. 16// * Neither the name of Google Inc. nor the names of its 17// contributors may be used to endorse or promote products derived from 18// this software without specific prior written permission. 19// 20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 32// Author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 33 34// dump_syms.mm: Create a symbol file for use with minidumps 35 36#include "common/mac/dump_syms.h" 37 38#include <Foundation/Foundation.h> 39#include <mach-o/arch.h> 40#include <mach-o/fat.h> 41#include <stdio.h> 42 43#include <string> 44#include <vector> 45 46#include "common/dwarf/bytereader-inl.h" 47#include "common/dwarf/dwarf2reader.h" 48#include "common/dwarf_cfi_to_module.h" 49#include "common/dwarf_cu_to_module.h" 50#include "common/dwarf_line_to_module.h" 51#include "common/mac/file_id.h" 52#include "common/mac/macho_reader.h" 53#include "common/module.h" 54#include "common/stabs_reader.h" 55#include "common/stabs_to_module.h" 56 57#ifndef CPU_TYPE_ARM 58#define CPU_TYPE_ARM (static_cast<cpu_type_t>(12)) 59#endif // CPU_TYPE_ARM 60 61using dwarf2reader::ByteReader; 62using google_breakpad::DwarfCUToModule; 63using google_breakpad::DwarfLineToModule; 64using google_breakpad::FileID; 65using google_breakpad::mach_o::FatReader; 66using google_breakpad::mach_o::Section; 67using google_breakpad::mach_o::Segment; 68using google_breakpad::Module; 69using google_breakpad::StabsReader; 70using google_breakpad::StabsToModule; 71using std::make_pair; 72using std::pair; 73using std::string; 74using std::vector; 75 76namespace google_breakpad { 77 78bool DumpSymbols::Read(NSString *filename) { 79 if (![[NSFileManager defaultManager] fileExistsAtPath:filename]) { 80 fprintf(stderr, "Object file does not exist: %s\n", 81 [filename fileSystemRepresentation]); 82 return false; 83 } 84 85 input_pathname_ = [filename retain]; 86 87 // Does this filename refer to a dSYM bundle? 88 NSBundle *bundle = [NSBundle bundleWithPath:input_pathname_]; 89 90 if (bundle) { 91 // Filenames referring to bundles usually have names of the form 92 // "<basename>.dSYM"; however, if the user has specified a wrapper 93 // suffix (the WRAPPER_SUFFIX and WRAPPER_EXTENSION build settings), 94 // then the name may have the form "<basename>.<extension>.dSYM". In 95 // either case, the resource name for the file containing the DWARF 96 // info within the bundle is <basename>. 97 // 98 // Since there's no way to tell how much to strip off, remove one 99 // extension at a time, and use the first one that 100 // pathForResource:ofType:inDirectory likes. 101 NSString *base_name = [input_pathname_ lastPathComponent]; 102 NSString *dwarf_resource; 103 104 do { 105 NSString *new_base_name = [base_name stringByDeletingPathExtension]; 106 107 // If stringByDeletingPathExtension returned the name unchanged, then 108 // there's nothing more for us to strip off --- lose. 109 if ([new_base_name isEqualToString:base_name]) { 110 fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n", 111 [input_pathname_ fileSystemRepresentation]); 112 return false; 113 } 114 115 // Take the shortened result as our new base_name. 116 base_name = new_base_name; 117 118 // Try to find a DWARF resource in the bundle under the new base_name. 119 dwarf_resource = [bundle pathForResource:base_name 120 ofType:nil inDirectory:@"DWARF"]; 121 } while (!dwarf_resource); 122 123 object_filename_ = [dwarf_resource retain]; 124 } else { 125 object_filename_ = [input_pathname_ retain]; 126 } 127 128 // Read the file's contents into memory. 129 // 130 // The documentation for dataWithContentsOfMappedFile says: 131 // 132 // Because of file mapping restrictions, this method should only be 133 // used if the file is guaranteed to exist for the duration of the 134 // data object’s existence. It is generally safer to use the 135 // dataWithContentsOfFile: method. 136 // 137 // I gather this means that OS X doesn't have (or at least, that method 138 // doesn't use) a form of mapping like Linux's MAP_PRIVATE, where the 139 // process appears to get its own copy of the data, and changes to the 140 // file don't affect memory and vice versa). 141 NSError *error; 142 contents_ = [NSData dataWithContentsOfFile:object_filename_ 143 options:0 144 error:&error]; 145 if (!contents_) { 146 fprintf(stderr, "Error reading object file: %s: %s\n", 147 [object_filename_ fileSystemRepresentation], 148 [[error localizedDescription] UTF8String]); 149 return false; 150 } 151 [contents_ retain]; 152 153 // Get the list of object files present in the file. 154 FatReader::Reporter fat_reporter([object_filename_ 155 fileSystemRepresentation]); 156 FatReader fat_reader(&fat_reporter); 157 if (!fat_reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes]), 158 [contents_ length])) { 159 return false; 160 } 161 162 // Get our own copy of fat_reader's object file list. 163 size_t object_files_count; 164 const struct fat_arch *object_files = 165 fat_reader.object_files(&object_files_count); 166 if (object_files_count == 0) { 167 fprintf(stderr, "Fat binary file contains *no* architectures: %s\n", 168 [object_filename_ fileSystemRepresentation]); 169 return false; 170 } 171 object_files_.resize(object_files_count); 172 memcpy(&object_files_[0], object_files, 173 sizeof(struct fat_arch) * object_files_count); 174 175 return true; 176} 177 178bool DumpSymbols::SetArchitecture(cpu_type_t cpu_type, 179 cpu_subtype_t cpu_subtype) { 180 // Find the best match for the architecture the user requested. 181 const struct fat_arch *best_match 182 = NXFindBestFatArch(cpu_type, cpu_subtype, &object_files_[0], 183 static_cast<uint32_t>(object_files_.size())); 184 if (!best_match) return false; 185 186 // Record the selected object file. 187 selected_object_file_ = best_match; 188 return true; 189} 190 191bool DumpSymbols::SetArchitecture(const std::string &arch_name) { 192 bool arch_set = false; 193 const NXArchInfo *arch_info = NXGetArchInfoFromName(arch_name.c_str()); 194 if (arch_info) { 195 arch_set = SetArchitecture(arch_info->cputype, arch_info->cpusubtype); 196 } 197 return arch_set; 198} 199 200string DumpSymbols::Identifier() { 201 FileID file_id([object_filename_ fileSystemRepresentation]); 202 unsigned char identifier_bytes[16]; 203 cpu_type_t cpu_type = selected_object_file_->cputype; 204 if (!file_id.MachoIdentifier(cpu_type, identifier_bytes)) { 205 fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n", 206 [object_filename_ fileSystemRepresentation]); 207 return ""; 208 } 209 210 char identifier_string[40]; 211 FileID::ConvertIdentifierToString(identifier_bytes, identifier_string, 212 sizeof(identifier_string)); 213 214 string compacted(identifier_string); 215 for(size_t i = compacted.find('-'); i != string::npos; 216 i = compacted.find('-', i)) 217 compacted.erase(i, 1); 218 219 return compacted; 220} 221 222// A line-to-module loader that accepts line number info parsed by 223// dwarf2reader::LineInfo and populates a Module and a line vector 224// with the results. 225class DumpSymbols::DumperLineToModule: 226 public DwarfCUToModule::LineToModuleFunctor { 227 public: 228 // Create a line-to-module converter using BYTE_READER. 229 DumperLineToModule(dwarf2reader::ByteReader *byte_reader) 230 : byte_reader_(byte_reader) { } 231 void operator()(const char *program, uint64 length, 232 Module *module, vector<Module::Line> *lines) { 233 DwarfLineToModule handler(module, lines); 234 dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler); 235 parser.Start(); 236 } 237 private: 238 dwarf2reader::ByteReader *byte_reader_; // WEAK 239}; 240 241bool DumpSymbols::ReadDwarf(google_breakpad::Module *module, 242 const mach_o::Reader &macho_reader, 243 const mach_o::SectionMap &dwarf_sections) const { 244 // Build a byte reader of the appropriate endianness. 245 ByteReader byte_reader(macho_reader.big_endian() 246 ? dwarf2reader::ENDIANNESS_BIG 247 : dwarf2reader::ENDIANNESS_LITTLE); 248 249 // Construct a context for this file. 250 DwarfCUToModule::FileContext file_context(selected_object_name_, 251 module); 252 253 // Build a dwarf2reader::SectionMap from our mach_o::SectionMap. 254 for (mach_o::SectionMap::const_iterator it = dwarf_sections.begin(); 255 it != dwarf_sections.end(); it++) { 256 file_context.section_map[it->first] = 257 make_pair(reinterpret_cast<const char *>(it->second.contents.start), 258 it->second.contents.Size()); 259 } 260 261 // Find the __debug_info section. 262 std::pair<const char *, uint64> debug_info_section 263 = file_context.section_map["__debug_info"]; 264 // There had better be a __debug_info section! 265 if (!debug_info_section.first) { 266 fprintf(stderr, "%s: __DWARF segment of file has no __debug_info section\n", 267 selected_object_name_.c_str()); 268 return false; 269 } 270 271 // Build a line-to-module loader for the root handler to use. 272 DumperLineToModule line_to_module(&byte_reader); 273 274 // Walk the __debug_info section, one compilation unit at a time. 275 uint64 debug_info_length = debug_info_section.second; 276 for (uint64 offset = 0; offset < debug_info_length;) { 277 // Make a handler for the root DIE that populates MODULE with the 278 // debug info. 279 DwarfCUToModule::WarningReporter reporter(selected_object_name_, 280 offset); 281 DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter); 282 // Make a Dwarf2Handler that drives our DIEHandler. 283 dwarf2reader::DIEDispatcher die_dispatcher(&root_handler); 284 // Make a DWARF parser for the compilation unit at OFFSET. 285 dwarf2reader::CompilationUnit dwarf_reader(file_context.section_map, 286 offset, 287 &byte_reader, 288 &die_dispatcher); 289 // Process the entire compilation unit; get the offset of the next. 290 offset += dwarf_reader.Start(); 291 } 292 293 return true; 294} 295 296bool DumpSymbols::ReadCFI(google_breakpad::Module *module, 297 const mach_o::Reader &macho_reader, 298 const mach_o::Section §ion, 299 bool eh_frame) const { 300 // Find the appropriate set of register names for this file's 301 // architecture. 302 vector<string> register_names; 303 switch (macho_reader.cpu_type()) { 304 case CPU_TYPE_X86: 305 register_names = DwarfCFIToModule::RegisterNames::I386(); 306 break; 307 case CPU_TYPE_X86_64: 308 register_names = DwarfCFIToModule::RegisterNames::X86_64(); 309 break; 310 case CPU_TYPE_ARM: 311 register_names = DwarfCFIToModule::RegisterNames::ARM(); 312 break; 313 default: { 314 const NXArchInfo *arch = 315 NXGetArchInfoFromCpuType(macho_reader.cpu_type(), 316 macho_reader.cpu_subtype()); 317 fprintf(stderr, "%s: cannot convert DWARF call frame information for ", 318 selected_object_name_.c_str()); 319 if (arch) 320 fprintf(stderr, "architecture '%s'", arch->name); 321 else 322 fprintf(stderr, "architecture %d,%d", 323 macho_reader.cpu_type(), macho_reader.cpu_subtype()); 324 fprintf(stderr, " to Breakpad symbol file: no register name table\n"); 325 return false; 326 } 327 } 328 329 // Find the call frame information and its size. 330 const char *cfi = reinterpret_cast<const char *>(section.contents.start); 331 size_t cfi_size = section.contents.Size(); 332 333 // Plug together the parser, handler, and their entourages. 334 DwarfCFIToModule::Reporter module_reporter(selected_object_name_, 335 section.section_name); 336 DwarfCFIToModule handler(module, register_names, &module_reporter); 337 dwarf2reader::ByteReader byte_reader(macho_reader.big_endian() ? 338 dwarf2reader::ENDIANNESS_BIG : 339 dwarf2reader::ENDIANNESS_LITTLE); 340 byte_reader.SetAddressSize(macho_reader.bits_64() ? 8 : 4); 341 // At the moment, according to folks at Apple and some cursory 342 // investigation, Mac OS X only uses DW_EH_PE_pcrel-based pointers, so 343 // this is the only base address the CFI parser will need. 344 byte_reader.SetCFIDataBase(section.address, cfi); 345 346 dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(selected_object_name_, 347 section.section_name); 348 dwarf2reader::CallFrameInfo parser(cfi, cfi_size, 349 &byte_reader, &handler, &dwarf_reporter, 350 eh_frame); 351 parser.Start(); 352 return true; 353} 354 355// A LoadCommandHandler that loads whatever debugging data it finds into a 356// Module. 357class DumpSymbols::LoadCommandDumper: 358 public mach_o::Reader::LoadCommandHandler { 359 public: 360 // Create a load command dumper handling load commands from READER's 361 // file, and adding data to MODULE. 362 LoadCommandDumper(const DumpSymbols &dumper, 363 google_breakpad::Module *module, 364 const mach_o::Reader &reader) 365 : dumper_(dumper), module_(module), reader_(reader) { } 366 367 bool SegmentCommand(const mach_o::Segment &segment); 368 bool SymtabCommand(const ByteBuffer &entries, const ByteBuffer &strings); 369 370 private: 371 const DumpSymbols &dumper_; 372 google_breakpad::Module *module_; // WEAK 373 const mach_o::Reader &reader_; 374}; 375 376bool DumpSymbols::LoadCommandDumper::SegmentCommand(const Segment &segment) { 377 mach_o::SectionMap section_map; 378 if (!reader_.MapSegmentSections(segment, §ion_map)) 379 return false; 380 381 if (segment.name == "__TEXT") { 382 module_->SetLoadAddress(segment.vmaddr); 383 mach_o::SectionMap::const_iterator eh_frame = 384 section_map.find("__eh_frame"); 385 if (eh_frame != section_map.end()) { 386 // If there is a problem reading this, don't treat it as a fatal error. 387 dumper_.ReadCFI(module_, reader_, eh_frame->second, true); 388 } 389 return true; 390 } 391 392 if (segment.name == "__DWARF") { 393 if (!dumper_.ReadDwarf(module_, reader_, section_map)) 394 return false; 395 mach_o::SectionMap::const_iterator debug_frame 396 = section_map.find("__debug_frame"); 397 if (debug_frame != section_map.end()) { 398 // If there is a problem reading this, don't treat it as a fatal error. 399 dumper_.ReadCFI(module_, reader_, debug_frame->second, false); 400 } 401 } 402 403 return true; 404} 405 406bool DumpSymbols::LoadCommandDumper::SymtabCommand(const ByteBuffer &entries, 407 const ByteBuffer &strings) { 408 StabsToModule stabs_to_module(module_); 409 // Mac OS X STABS are never "unitized", and the size of the 'value' field 410 // matches the address size of the executable. 411 StabsReader stabs_reader(entries.start, entries.Size(), 412 strings.start, strings.Size(), 413 reader_.big_endian(), 414 reader_.bits_64() ? 8 : 4, 415 true, 416 &stabs_to_module); 417 if (!stabs_reader.Process()) 418 return false; 419 stabs_to_module.Finalize(); 420 return true; 421} 422 423bool DumpSymbols::WriteSymbolFile(FILE *stream) { 424 // Select an object file, if SetArchitecture hasn't been called to set one 425 // explicitly. 426 if (!selected_object_file_) { 427 // If there's only one architecture, that's the one. 428 if (object_files_.size() == 1) 429 selected_object_file_ = &object_files_[0]; 430 else { 431 // Look for an object file whose architecture matches our own. 432 const NXArchInfo *local_arch = NXGetLocalArchInfo(); 433 if (!SetArchitecture(local_arch->cputype, local_arch->cpusubtype)) { 434 fprintf(stderr, "%s: object file contains more than one" 435 " architecture, none of which match the current" 436 " architecture; specify an architecture explicitly" 437 " with '-a ARCH' to resolve the ambiguity\n", 438 [object_filename_ fileSystemRepresentation]); 439 return false; 440 } 441 } 442 } 443 444 assert(selected_object_file_); 445 446 // Find the name of the selected file's architecture, to appear in 447 // the MODULE record and in error messages. 448 const NXArchInfo *selected_arch_info 449 = NXGetArchInfoFromCpuType(selected_object_file_->cputype, 450 selected_object_file_->cpusubtype); 451 452 const char *selected_arch_name = selected_arch_info->name; 453 if (strcmp(selected_arch_name, "i386") == 0) 454 selected_arch_name = "x86"; 455 456 // Produce a name to use in error messages that includes the 457 // filename, and the architecture, if there is more than one. 458 selected_object_name_ = [object_filename_ UTF8String]; 459 if (object_files_.size() > 1) { 460 selected_object_name_ += ", architecture "; 461 selected_object_name_ + selected_arch_name; 462 } 463 464 // Compute a module name, to appear in the MODULE record. 465 NSString *module_name = [object_filename_ lastPathComponent]; 466 467 // Choose an identifier string, to appear in the MODULE record. 468 string identifier = Identifier(); 469 if (identifier.empty()) 470 return false; 471 identifier += "0"; 472 473 // Create a module to hold the debugging information. 474 Module module([module_name UTF8String], "mac", selected_arch_name, 475 identifier); 476 477 // Parse the selected object file. 478 mach_o::Reader::Reporter reporter(selected_object_name_); 479 mach_o::Reader reader(&reporter); 480 if (!reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes]) 481 + selected_object_file_->offset, 482 selected_object_file_->size, 483 selected_object_file_->cputype, 484 selected_object_file_->cpusubtype)) 485 return false; 486 487 // Walk its load commands, and deal with whatever is there. 488 LoadCommandDumper load_command_dumper(*this, &module, reader); 489 if (!reader.WalkLoadCommands(&load_command_dumper)) 490 return false; 491 492 return module.Write(stream); 493} 494 495} // namespace google_breakpad 496