1// -*- mode: c++ -*-
2
3// Copyright (c) 2011, Google Inc.
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10//     * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12//     * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16//     * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32// Author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
33
34// dump_syms.mm: Create a symbol file for use with minidumps
35
36#include "common/mac/dump_syms.h"
37
38#include <Foundation/Foundation.h>
39#include <mach-o/arch.h>
40#include <mach-o/fat.h>
41#include <stdio.h>
42
43#include <ostream>
44#include <string>
45#include <vector>
46
47#include "common/dwarf/bytereader-inl.h"
48#include "common/dwarf/dwarf2reader.h"
49#include "common/dwarf_cfi_to_module.h"
50#include "common/dwarf_cu_to_module.h"
51#include "common/dwarf_line_to_module.h"
52#include "common/mac/file_id.h"
53#include "common/mac/arch_utilities.h"
54#include "common/mac/macho_reader.h"
55#include "common/module.h"
56#include "common/scoped_ptr.h"
57#include "common/stabs_reader.h"
58#include "common/stabs_to_module.h"
59#include "common/symbol_data.h"
60
61#ifndef CPU_TYPE_ARM
62#define CPU_TYPE_ARM (static_cast<cpu_type_t>(12))
63#endif //  CPU_TYPE_ARM
64
65using dwarf2reader::ByteReader;
66using google_breakpad::DwarfCUToModule;
67using google_breakpad::DwarfLineToModule;
68using google_breakpad::FileID;
69using google_breakpad::mach_o::FatReader;
70using google_breakpad::mach_o::Section;
71using google_breakpad::mach_o::Segment;
72using google_breakpad::Module;
73using google_breakpad::StabsReader;
74using google_breakpad::StabsToModule;
75using google_breakpad::scoped_ptr;
76using std::make_pair;
77using std::pair;
78using std::string;
79using std::vector;
80
81namespace google_breakpad {
82
83bool DumpSymbols::Read(NSString *filename) {
84  if (![[NSFileManager defaultManager] fileExistsAtPath:filename]) {
85    fprintf(stderr, "Object file does not exist: %s\n",
86            [filename fileSystemRepresentation]);
87    return false;
88  }
89
90  input_pathname_ = [filename retain];
91
92  // Does this filename refer to a dSYM bundle?
93  NSBundle *bundle = [NSBundle bundleWithPath:input_pathname_];
94
95  if (bundle) {
96    // Filenames referring to bundles usually have names of the form
97    // "<basename>.dSYM"; however, if the user has specified a wrapper
98    // suffix (the WRAPPER_SUFFIX and WRAPPER_EXTENSION build settings),
99    // then the name may have the form "<basename>.<extension>.dSYM". In
100    // either case, the resource name for the file containing the DWARF
101    // info within the bundle is <basename>.
102    //
103    // Since there's no way to tell how much to strip off, remove one
104    // extension at a time, and use the first one that
105    // pathForResource:ofType:inDirectory likes.
106    NSString *base_name = [input_pathname_ lastPathComponent];
107    NSString *dwarf_resource;
108
109    do {
110      NSString *new_base_name = [base_name stringByDeletingPathExtension];
111
112      // If stringByDeletingPathExtension returned the name unchanged, then
113      // there's nothing more for us to strip off --- lose.
114      if ([new_base_name isEqualToString:base_name]) {
115        fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n",
116                [input_pathname_ fileSystemRepresentation]);
117        return false;
118      }
119
120      // Take the shortened result as our new base_name.
121      base_name = new_base_name;
122
123      // Try to find a DWARF resource in the bundle under the new base_name.
124      dwarf_resource = [bundle pathForResource:base_name
125                        ofType:nil inDirectory:@"DWARF"];
126    } while (!dwarf_resource);
127
128    object_filename_ = [dwarf_resource retain];
129  } else {
130    object_filename_ = [input_pathname_ retain];
131  }
132
133  // Read the file's contents into memory.
134  //
135  // The documentation for dataWithContentsOfMappedFile says:
136  //
137  //     Because of file mapping restrictions, this method should only be
138  //     used if the file is guaranteed to exist for the duration of the
139  //     data object’s existence. It is generally safer to use the
140  //     dataWithContentsOfFile: method.
141  //
142  // I gather this means that OS X doesn't have (or at least, that method
143  // doesn't use) a form of mapping like Linux's MAP_PRIVATE, where the
144  // process appears to get its own copy of the data, and changes to the
145  // file don't affect memory and vice versa).
146  NSError *error;
147  contents_ = [NSData dataWithContentsOfFile:object_filename_
148                                     options:0
149                                       error:&error];
150  if (!contents_) {
151    fprintf(stderr, "Error reading object file: %s: %s\n",
152            [object_filename_ fileSystemRepresentation],
153            [[error localizedDescription] UTF8String]);
154    return false;
155  }
156  [contents_ retain];
157
158  // Get the list of object files present in the file.
159  FatReader::Reporter fat_reporter([object_filename_
160                                    fileSystemRepresentation]);
161  FatReader fat_reader(&fat_reporter);
162  if (!fat_reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes]),
163                       [contents_ length])) {
164    return false;
165  }
166
167  // Get our own copy of fat_reader's object file list.
168  size_t object_files_count;
169  const struct fat_arch *object_files =
170    fat_reader.object_files(&object_files_count);
171  if (object_files_count == 0) {
172    fprintf(stderr, "Fat binary file contains *no* architectures: %s\n",
173            [object_filename_ fileSystemRepresentation]);
174    return false;
175  }
176  object_files_.resize(object_files_count);
177  memcpy(&object_files_[0], object_files,
178         sizeof(struct fat_arch) * object_files_count);
179
180  return true;
181}
182
183bool DumpSymbols::SetArchitecture(cpu_type_t cpu_type,
184                                  cpu_subtype_t cpu_subtype) {
185  // Find the best match for the architecture the user requested.
186  const struct fat_arch *best_match
187    = NXFindBestFatArch(cpu_type, cpu_subtype, &object_files_[0],
188                        static_cast<uint32_t>(object_files_.size()));
189  if (!best_match) return false;
190
191  // Record the selected object file.
192  selected_object_file_ = best_match;
193  return true;
194}
195
196bool DumpSymbols::SetArchitecture(const std::string &arch_name) {
197  bool arch_set = false;
198  const NXArchInfo *arch_info =
199      google_breakpad::BreakpadGetArchInfoFromName(arch_name.c_str());
200  if (arch_info) {
201    arch_set = SetArchitecture(arch_info->cputype, arch_info->cpusubtype);
202  }
203  return arch_set;
204}
205
206string DumpSymbols::Identifier() {
207  FileID file_id([object_filename_ fileSystemRepresentation]);
208  unsigned char identifier_bytes[16];
209  cpu_type_t cpu_type = selected_object_file_->cputype;
210  cpu_subtype_t cpu_subtype = selected_object_file_->cpusubtype;
211  if (!file_id.MachoIdentifier(cpu_type, cpu_subtype, identifier_bytes)) {
212    fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n",
213            [object_filename_ fileSystemRepresentation]);
214    return "";
215  }
216
217  char identifier_string[40];
218  FileID::ConvertIdentifierToString(identifier_bytes, identifier_string,
219                                    sizeof(identifier_string));
220
221  string compacted(identifier_string);
222  for(size_t i = compacted.find('-'); i != string::npos;
223      i = compacted.find('-', i))
224    compacted.erase(i, 1);
225
226  return compacted;
227}
228
229// A line-to-module loader that accepts line number info parsed by
230// dwarf2reader::LineInfo and populates a Module and a line vector
231// with the results.
232class DumpSymbols::DumperLineToModule:
233      public DwarfCUToModule::LineToModuleHandler {
234 public:
235  // Create a line-to-module converter using BYTE_READER.
236  DumperLineToModule(dwarf2reader::ByteReader *byte_reader)
237      : byte_reader_(byte_reader) { }
238
239  void StartCompilationUnit(const string& compilation_dir) {
240    compilation_dir_ = compilation_dir;
241  }
242
243  void ReadProgram(const char *program, uint64 length,
244                   Module *module, vector<Module::Line> *lines) {
245    DwarfLineToModule handler(module, compilation_dir_, lines);
246    dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler);
247    parser.Start();
248  }
249 private:
250  string compilation_dir_;
251  dwarf2reader::ByteReader *byte_reader_;  // WEAK
252};
253
254bool DumpSymbols::ReadDwarf(google_breakpad::Module *module,
255                            const mach_o::Reader &macho_reader,
256                            const mach_o::SectionMap &dwarf_sections,
257                            bool handle_inter_cu_refs) const {
258  // Build a byte reader of the appropriate endianness.
259  ByteReader byte_reader(macho_reader.big_endian()
260                         ? dwarf2reader::ENDIANNESS_BIG
261                         : dwarf2reader::ENDIANNESS_LITTLE);
262
263  // Construct a context for this file.
264  DwarfCUToModule::FileContext file_context(selected_object_name_,
265                                            module,
266                                            handle_inter_cu_refs);
267
268  // Build a dwarf2reader::SectionMap from our mach_o::SectionMap.
269  for (mach_o::SectionMap::const_iterator it = dwarf_sections.begin();
270       it != dwarf_sections.end(); ++it) {
271    file_context.AddSectionToSectionMap(
272        it->first,
273        reinterpret_cast<const char *>(it->second.contents.start),
274        it->second.contents.Size());
275  }
276
277  // Find the __debug_info section.
278  dwarf2reader::SectionMap::const_iterator debug_info_entry =
279      file_context.section_map().find("__debug_info");
280  assert(debug_info_entry != file_context.section_map().end());
281  const std::pair<const char*, uint64>& debug_info_section =
282      debug_info_entry->second;
283  // There had better be a __debug_info section!
284  if (!debug_info_section.first) {
285    fprintf(stderr, "%s: __DWARF segment of file has no __debug_info section\n",
286            selected_object_name_.c_str());
287    return false;
288  }
289
290  // Build a line-to-module loader for the root handler to use.
291  DumperLineToModule line_to_module(&byte_reader);
292
293  // Walk the __debug_info section, one compilation unit at a time.
294  uint64 debug_info_length = debug_info_section.second;
295  for (uint64 offset = 0; offset < debug_info_length;) {
296    // Make a handler for the root DIE that populates MODULE with the
297    // debug info.
298    DwarfCUToModule::WarningReporter reporter(selected_object_name_,
299                                              offset);
300    DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter);
301    // Make a Dwarf2Handler that drives our DIEHandler.
302    dwarf2reader::DIEDispatcher die_dispatcher(&root_handler);
303    // Make a DWARF parser for the compilation unit at OFFSET.
304    dwarf2reader::CompilationUnit dwarf_reader(file_context.section_map(),
305                                               offset,
306                                               &byte_reader,
307                                               &die_dispatcher);
308    // Process the entire compilation unit; get the offset of the next.
309    offset += dwarf_reader.Start();
310  }
311
312  return true;
313}
314
315bool DumpSymbols::ReadCFI(google_breakpad::Module *module,
316                          const mach_o::Reader &macho_reader,
317                          const mach_o::Section &section,
318                          bool eh_frame) const {
319  // Find the appropriate set of register names for this file's
320  // architecture.
321  vector<string> register_names;
322  switch (macho_reader.cpu_type()) {
323    case CPU_TYPE_X86:
324      register_names = DwarfCFIToModule::RegisterNames::I386();
325      break;
326    case CPU_TYPE_X86_64:
327      register_names = DwarfCFIToModule::RegisterNames::X86_64();
328      break;
329    case CPU_TYPE_ARM:
330      register_names = DwarfCFIToModule::RegisterNames::ARM();
331      break;
332    default: {
333      const NXArchInfo *arch = google_breakpad::BreakpadGetArchInfoFromCpuType(
334          macho_reader.cpu_type(), macho_reader.cpu_subtype());
335      fprintf(stderr, "%s: cannot convert DWARF call frame information for ",
336              selected_object_name_.c_str());
337      if (arch)
338        fprintf(stderr, "architecture '%s'", arch->name);
339      else
340        fprintf(stderr, "architecture %d,%d",
341                macho_reader.cpu_type(), macho_reader.cpu_subtype());
342      fprintf(stderr, " to Breakpad symbol file: no register name table\n");
343      return false;
344    }
345  }
346
347  // Find the call frame information and its size.
348  const char *cfi = reinterpret_cast<const char *>(section.contents.start);
349  size_t cfi_size = section.contents.Size();
350
351  // Plug together the parser, handler, and their entourages.
352  DwarfCFIToModule::Reporter module_reporter(selected_object_name_,
353                                             section.section_name);
354  DwarfCFIToModule handler(module, register_names, &module_reporter);
355  dwarf2reader::ByteReader byte_reader(macho_reader.big_endian() ?
356                                       dwarf2reader::ENDIANNESS_BIG :
357                                       dwarf2reader::ENDIANNESS_LITTLE);
358  byte_reader.SetAddressSize(macho_reader.bits_64() ? 8 : 4);
359  // At the moment, according to folks at Apple and some cursory
360  // investigation, Mac OS X only uses DW_EH_PE_pcrel-based pointers, so
361  // this is the only base address the CFI parser will need.
362  byte_reader.SetCFIDataBase(section.address, cfi);
363
364  dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(selected_object_name_,
365                                                       section.section_name);
366  dwarf2reader::CallFrameInfo parser(cfi, cfi_size,
367                                     &byte_reader, &handler, &dwarf_reporter,
368                                     eh_frame);
369  parser.Start();
370  return true;
371}
372
373// A LoadCommandHandler that loads whatever debugging data it finds into a
374// Module.
375class DumpSymbols::LoadCommandDumper:
376      public mach_o::Reader::LoadCommandHandler {
377 public:
378  // Create a load command dumper handling load commands from READER's
379  // file, and adding data to MODULE.
380  LoadCommandDumper(const DumpSymbols &dumper,
381                    google_breakpad::Module *module,
382                    const mach_o::Reader &reader,
383                    SymbolData symbol_data,
384                    bool handle_inter_cu_refs)
385      : dumper_(dumper),
386        module_(module),
387        reader_(reader),
388        symbol_data_(symbol_data),
389        handle_inter_cu_refs_(handle_inter_cu_refs) { }
390
391  bool SegmentCommand(const mach_o::Segment &segment);
392  bool SymtabCommand(const ByteBuffer &entries, const ByteBuffer &strings);
393
394 private:
395  const DumpSymbols &dumper_;
396  google_breakpad::Module *module_;  // WEAK
397  const mach_o::Reader &reader_;
398  const SymbolData symbol_data_;
399  const bool handle_inter_cu_refs_;
400};
401
402bool DumpSymbols::LoadCommandDumper::SegmentCommand(const Segment &segment) {
403  mach_o::SectionMap section_map;
404  if (!reader_.MapSegmentSections(segment, &section_map))
405    return false;
406
407  if (segment.name == "__TEXT") {
408    module_->SetLoadAddress(segment.vmaddr);
409    if (symbol_data_ != NO_CFI) {
410      mach_o::SectionMap::const_iterator eh_frame =
411          section_map.find("__eh_frame");
412      if (eh_frame != section_map.end()) {
413        // If there is a problem reading this, don't treat it as a fatal error.
414        dumper_.ReadCFI(module_, reader_, eh_frame->second, true);
415      }
416    }
417    return true;
418  }
419
420  if (segment.name == "__DWARF") {
421    if (symbol_data_ != ONLY_CFI) {
422      if (!dumper_.ReadDwarf(module_, reader_, section_map,
423                             handle_inter_cu_refs_)) {
424        return false;
425      }
426    }
427    if (symbol_data_ != NO_CFI) {
428      mach_o::SectionMap::const_iterator debug_frame
429          = section_map.find("__debug_frame");
430      if (debug_frame != section_map.end()) {
431        // If there is a problem reading this, don't treat it as a fatal error.
432        dumper_.ReadCFI(module_, reader_, debug_frame->second, false);
433      }
434    }
435  }
436
437  return true;
438}
439
440bool DumpSymbols::LoadCommandDumper::SymtabCommand(const ByteBuffer &entries,
441                                                   const ByteBuffer &strings) {
442  StabsToModule stabs_to_module(module_);
443  // Mac OS X STABS are never "unitized", and the size of the 'value' field
444  // matches the address size of the executable.
445  StabsReader stabs_reader(entries.start, entries.Size(),
446                           strings.start, strings.Size(),
447                           reader_.big_endian(),
448                           reader_.bits_64() ? 8 : 4,
449                           true,
450                           &stabs_to_module);
451  if (!stabs_reader.Process())
452    return false;
453  stabs_to_module.Finalize();
454  return true;
455}
456
457bool DumpSymbols::ReadSymbolData(Module** out_module) {
458  // Select an object file, if SetArchitecture hasn't been called to set one
459  // explicitly.
460  if (!selected_object_file_) {
461    // If there's only one architecture, that's the one.
462    if (object_files_.size() == 1)
463      selected_object_file_ = &object_files_[0];
464    else {
465      // Look for an object file whose architecture matches our own.
466      const NXArchInfo *local_arch = NXGetLocalArchInfo();
467      if (!SetArchitecture(local_arch->cputype, local_arch->cpusubtype)) {
468        fprintf(stderr, "%s: object file contains more than one"
469                " architecture, none of which match the current"
470                " architecture; specify an architecture explicitly"
471                " with '-a ARCH' to resolve the ambiguity\n",
472                [object_filename_ fileSystemRepresentation]);
473        return false;
474      }
475    }
476  }
477
478  assert(selected_object_file_);
479
480  // Find the name of the selected file's architecture, to appear in
481  // the MODULE record and in error messages.
482  const NXArchInfo *selected_arch_info =
483      google_breakpad::BreakpadGetArchInfoFromCpuType(
484          selected_object_file_->cputype, selected_object_file_->cpusubtype);
485
486  const char *selected_arch_name = selected_arch_info->name;
487  if (strcmp(selected_arch_name, "i386") == 0)
488    selected_arch_name = "x86";
489
490  // Produce a name to use in error messages that includes the
491  // filename, and the architecture, if there is more than one.
492  selected_object_name_ = [object_filename_ UTF8String];
493  if (object_files_.size() > 1) {
494    selected_object_name_ += ", architecture ";
495    selected_object_name_ + selected_arch_name;
496  }
497
498  // Compute a module name, to appear in the MODULE record.
499  NSString *module_name = [object_filename_ lastPathComponent];
500
501  // Choose an identifier string, to appear in the MODULE record.
502  string identifier = Identifier();
503  if (identifier.empty())
504    return false;
505  identifier += "0";
506
507  // Create a module to hold the debugging information.
508  scoped_ptr<Module> module(new Module([module_name UTF8String],
509                                       "mac",
510                                       selected_arch_name,
511                                       identifier));
512
513  // Parse the selected object file.
514  mach_o::Reader::Reporter reporter(selected_object_name_);
515  mach_o::Reader reader(&reporter);
516  if (!reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes])
517                   + selected_object_file_->offset,
518                   selected_object_file_->size,
519                   selected_object_file_->cputype,
520                   selected_object_file_->cpusubtype))
521    return false;
522
523  // Walk its load commands, and deal with whatever is there.
524  LoadCommandDumper load_command_dumper(*this, module.get(), reader,
525                                        symbol_data_, handle_inter_cu_refs_);
526  if (!reader.WalkLoadCommands(&load_command_dumper))
527    return false;
528
529  *out_module = module.release();
530
531  return true;
532}
533
534bool DumpSymbols::WriteSymbolFile(std::ostream &stream) {
535  Module* module = NULL;
536
537  if (ReadSymbolData(&module) && module) {
538    bool res = module->Write(stream, symbol_data_);
539    delete module;
540    return res;
541  }
542
543  return false;
544}
545
546}  // namespace google_breakpad
547