1// -*- mode: c++ -*-
2
3// Copyright (c) 2010, Google Inc.
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10//     * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12//     * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16//     * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32// Author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
33
34// dump_syms.mm: Create a symbol file for use with minidumps
35
36#include "common/mac/dump_syms.h"
37
38#include <Foundation/Foundation.h>
39#include <mach-o/arch.h>
40#include <mach-o/fat.h>
41#include <stdio.h>
42
43#include <string>
44#include <vector>
45
46#include "common/dwarf/bytereader-inl.h"
47#include "common/dwarf/dwarf2reader.h"
48#include "common/dwarf_cfi_to_module.h"
49#include "common/dwarf_cu_to_module.h"
50#include "common/dwarf_line_to_module.h"
51#include "common/mac/file_id.h"
52#include "common/mac/macho_reader.h"
53#include "common/module.h"
54#include "common/stabs_reader.h"
55#include "common/stabs_to_module.h"
56
57#ifndef CPU_TYPE_ARM
58#define CPU_TYPE_ARM (static_cast<cpu_type_t>(12))
59#endif //  CPU_TYPE_ARM
60
61using dwarf2reader::ByteReader;
62using google_breakpad::DwarfCUToModule;
63using google_breakpad::DwarfLineToModule;
64using google_breakpad::FileID;
65using google_breakpad::mach_o::FatReader;
66using google_breakpad::mach_o::Section;
67using google_breakpad::mach_o::Segment;
68using google_breakpad::Module;
69using google_breakpad::StabsReader;
70using google_breakpad::StabsToModule;
71using std::make_pair;
72using std::pair;
73using std::string;
74using std::vector;
75
76namespace google_breakpad {
77
78bool DumpSymbols::Read(NSString *filename) {
79  if (![[NSFileManager defaultManager] fileExistsAtPath:filename]) {
80    fprintf(stderr, "Object file does not exist: %s\n",
81	    [filename fileSystemRepresentation]);
82    return false;
83  }
84
85  input_pathname_ = [filename retain];
86
87  // Does this filename refer to a dSYM bundle?
88  NSBundle *bundle = [NSBundle bundleWithPath:input_pathname_];
89
90  if (bundle) {
91    // Filenames referring to bundles usually have names of the form
92    // "<basename>.dSYM"; however, if the user has specified a wrapper
93    // suffix (the WRAPPER_SUFFIX and WRAPPER_EXTENSION build settings),
94    // then the name may have the form "<basename>.<extension>.dSYM". In
95    // either case, the resource name for the file containing the DWARF
96    // info within the bundle is <basename>.
97    //
98    // Since there's no way to tell how much to strip off, remove one
99    // extension at a time, and use the first one that
100    // pathForResource:ofType:inDirectory likes.
101    NSString *base_name = [input_pathname_ lastPathComponent];
102    NSString *dwarf_resource;
103
104    do {
105      NSString *new_base_name = [base_name stringByDeletingPathExtension];
106
107      // If stringByDeletingPathExtension returned the name unchanged, then
108      // there's nothing more for us to strip off --- lose.
109      if ([new_base_name isEqualToString:base_name]) {
110	fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n",
111		[input_pathname_ fileSystemRepresentation]);
112        return false;
113      }
114
115      // Take the shortened result as our new base_name.
116      base_name = new_base_name;
117
118      // Try to find a DWARF resource in the bundle under the new base_name.
119      dwarf_resource = [bundle pathForResource:base_name
120                        ofType:nil inDirectory:@"DWARF"];
121    } while (!dwarf_resource);
122
123    object_filename_ = [dwarf_resource retain];
124  } else {
125    object_filename_ = [input_pathname_ retain];
126  }
127
128  // Read the file's contents into memory.
129  //
130  // The documentation for dataWithContentsOfMappedFile says:
131  //
132  //     Because of file mapping restrictions, this method should only be
133  //     used if the file is guaranteed to exist for the duration of the
134  //     data object’s existence. It is generally safer to use the
135  //     dataWithContentsOfFile: method.
136  //
137  // I gather this means that OS X doesn't have (or at least, that method
138  // doesn't use) a form of mapping like Linux's MAP_PRIVATE, where the
139  // process appears to get its own copy of the data, and changes to the
140  // file don't affect memory and vice versa).
141  NSError *error;
142  contents_ = [NSData dataWithContentsOfFile:object_filename_
143	                             options:0
144	                               error:&error];
145  if (!contents_) {
146    fprintf(stderr, "Error reading object file: %s: %s\n",
147	    [object_filename_ fileSystemRepresentation],
148	    [[error localizedDescription] UTF8String]);
149    return false;
150  }
151  [contents_ retain];
152
153  // Get the list of object files present in the file.
154  FatReader::Reporter fat_reporter([object_filename_
155                                    fileSystemRepresentation]);
156  FatReader fat_reader(&fat_reporter);
157  if (!fat_reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes]),
158                       [contents_ length])) {
159    return false;
160  }
161
162  // Get our own copy of fat_reader's object file list.
163  size_t object_files_count;
164  const struct fat_arch *object_files =
165    fat_reader.object_files(&object_files_count);
166  if (object_files_count == 0) {
167    fprintf(stderr, "Fat binary file contains *no* architectures: %s\n",
168	    [object_filename_ fileSystemRepresentation]);
169    return false;
170  }
171  object_files_.resize(object_files_count);
172  memcpy(&object_files_[0], object_files,
173         sizeof(struct fat_arch) * object_files_count);
174
175  return true;
176}
177
178bool DumpSymbols::SetArchitecture(cpu_type_t cpu_type,
179                                  cpu_subtype_t cpu_subtype) {
180  // Find the best match for the architecture the user requested.
181  const struct fat_arch *best_match
182    = NXFindBestFatArch(cpu_type, cpu_subtype, &object_files_[0],
183                        static_cast<uint32_t>(object_files_.size()));
184  if (!best_match) return false;
185
186  // Record the selected object file.
187  selected_object_file_ = best_match;
188  return true;
189}
190
191bool DumpSymbols::SetArchitecture(const std::string &arch_name) {
192  bool arch_set = false;
193  const NXArchInfo *arch_info = NXGetArchInfoFromName(arch_name.c_str());
194  if (arch_info) {
195    arch_set = SetArchitecture(arch_info->cputype, arch_info->cpusubtype);
196  }
197  return arch_set;
198}
199
200string DumpSymbols::Identifier() {
201  FileID file_id([object_filename_ fileSystemRepresentation]);
202  unsigned char identifier_bytes[16];
203  cpu_type_t cpu_type = selected_object_file_->cputype;
204  if (!file_id.MachoIdentifier(cpu_type, identifier_bytes)) {
205    fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n",
206	    [object_filename_ fileSystemRepresentation]);
207    return "";
208  }
209
210  char identifier_string[40];
211  FileID::ConvertIdentifierToString(identifier_bytes, identifier_string,
212                                    sizeof(identifier_string));
213
214  string compacted(identifier_string);
215  for(size_t i = compacted.find('-'); i != string::npos;
216      i = compacted.find('-', i))
217    compacted.erase(i, 1);
218
219  return compacted;
220}
221
222// A line-to-module loader that accepts line number info parsed by
223// dwarf2reader::LineInfo and populates a Module and a line vector
224// with the results.
225class DumpSymbols::DumperLineToModule:
226      public DwarfCUToModule::LineToModuleFunctor {
227 public:
228  // Create a line-to-module converter using BYTE_READER.
229  DumperLineToModule(dwarf2reader::ByteReader *byte_reader)
230      : byte_reader_(byte_reader) { }
231  void operator()(const char *program, uint64 length,
232                  Module *module, vector<Module::Line> *lines) {
233    DwarfLineToModule handler(module, lines);
234    dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler);
235    parser.Start();
236  }
237 private:
238  dwarf2reader::ByteReader *byte_reader_;  // WEAK
239};
240
241bool DumpSymbols::ReadDwarf(google_breakpad::Module *module,
242                            const mach_o::Reader &macho_reader,
243                            const mach_o::SectionMap &dwarf_sections) const {
244  // Build a byte reader of the appropriate endianness.
245  ByteReader byte_reader(macho_reader.big_endian()
246                         ? dwarf2reader::ENDIANNESS_BIG
247                         : dwarf2reader::ENDIANNESS_LITTLE);
248
249  // Construct a context for this file.
250  DwarfCUToModule::FileContext file_context(selected_object_name_,
251                                            module);
252
253  // Build a dwarf2reader::SectionMap from our mach_o::SectionMap.
254  for (mach_o::SectionMap::const_iterator it = dwarf_sections.begin();
255       it != dwarf_sections.end(); it++) {
256    file_context.section_map[it->first] =
257      make_pair(reinterpret_cast<const char *>(it->second.contents.start),
258                it->second.contents.Size());
259  }
260
261  // Find the __debug_info section.
262  std::pair<const char *, uint64> debug_info_section
263      = file_context.section_map["__debug_info"];
264  // There had better be a __debug_info section!
265  if (!debug_info_section.first) {
266    fprintf(stderr, "%s: __DWARF segment of file has no __debug_info section\n",
267	    selected_object_name_.c_str());
268    return false;
269  }
270
271  // Build a line-to-module loader for the root handler to use.
272  DumperLineToModule line_to_module(&byte_reader);
273
274  // Walk the __debug_info section, one compilation unit at a time.
275  uint64 debug_info_length = debug_info_section.second;
276  for (uint64 offset = 0; offset < debug_info_length;) {
277    // Make a handler for the root DIE that populates MODULE with the
278    // debug info.
279    DwarfCUToModule::WarningReporter reporter(selected_object_name_,
280                                              offset);
281    DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter);
282    // Make a Dwarf2Handler that drives our DIEHandler.
283    dwarf2reader::DIEDispatcher die_dispatcher(&root_handler);
284    // Make a DWARF parser for the compilation unit at OFFSET.
285    dwarf2reader::CompilationUnit dwarf_reader(file_context.section_map,
286                                               offset,
287                                               &byte_reader,
288                                               &die_dispatcher);
289    // Process the entire compilation unit; get the offset of the next.
290    offset += dwarf_reader.Start();
291  }
292
293  return true;
294}
295
296bool DumpSymbols::ReadCFI(google_breakpad::Module *module,
297                          const mach_o::Reader &macho_reader,
298                          const mach_o::Section &section,
299                          bool eh_frame) const {
300  // Find the appropriate set of register names for this file's
301  // architecture.
302  vector<string> register_names;
303  switch (macho_reader.cpu_type()) {
304    case CPU_TYPE_X86:
305      register_names = DwarfCFIToModule::RegisterNames::I386();
306      break;
307    case CPU_TYPE_X86_64:
308      register_names = DwarfCFIToModule::RegisterNames::X86_64();
309      break;
310    case CPU_TYPE_ARM:
311      register_names = DwarfCFIToModule::RegisterNames::ARM();
312      break;
313    default: {
314      const NXArchInfo *arch =
315          NXGetArchInfoFromCpuType(macho_reader.cpu_type(),
316                                   macho_reader.cpu_subtype());
317      fprintf(stderr, "%s: cannot convert DWARF call frame information for ",
318              selected_object_name_.c_str());
319      if (arch)
320        fprintf(stderr, "architecture '%s'", arch->name);
321      else
322        fprintf(stderr, "architecture %d,%d",
323                macho_reader.cpu_type(), macho_reader.cpu_subtype());
324      fprintf(stderr, " to Breakpad symbol file: no register name table\n");
325      return false;
326    }
327  }
328
329  // Find the call frame information and its size.
330  const char *cfi = reinterpret_cast<const char *>(section.contents.start);
331  size_t cfi_size = section.contents.Size();
332
333  // Plug together the parser, handler, and their entourages.
334  DwarfCFIToModule::Reporter module_reporter(selected_object_name_,
335                                             section.section_name);
336  DwarfCFIToModule handler(module, register_names, &module_reporter);
337  dwarf2reader::ByteReader byte_reader(macho_reader.big_endian() ?
338                                       dwarf2reader::ENDIANNESS_BIG :
339                                       dwarf2reader::ENDIANNESS_LITTLE);
340  byte_reader.SetAddressSize(macho_reader.bits_64() ? 8 : 4);
341  // At the moment, according to folks at Apple and some cursory
342  // investigation, Mac OS X only uses DW_EH_PE_pcrel-based pointers, so
343  // this is the only base address the CFI parser will need.
344  byte_reader.SetCFIDataBase(section.address, cfi);
345
346  dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(selected_object_name_,
347                                                       section.section_name);
348  dwarf2reader::CallFrameInfo parser(cfi, cfi_size,
349                                     &byte_reader, &handler, &dwarf_reporter,
350                                     eh_frame);
351  parser.Start();
352  return true;
353}
354
355// A LoadCommandHandler that loads whatever debugging data it finds into a
356// Module.
357class DumpSymbols::LoadCommandDumper:
358      public mach_o::Reader::LoadCommandHandler {
359 public:
360  // Create a load command dumper handling load commands from READER's
361  // file, and adding data to MODULE.
362  LoadCommandDumper(const DumpSymbols &dumper,
363                    google_breakpad::Module *module,
364                    const mach_o::Reader &reader)
365      : dumper_(dumper), module_(module), reader_(reader) { }
366
367  bool SegmentCommand(const mach_o::Segment &segment);
368  bool SymtabCommand(const ByteBuffer &entries, const ByteBuffer &strings);
369
370 private:
371  const DumpSymbols &dumper_;
372  google_breakpad::Module *module_;  // WEAK
373  const mach_o::Reader &reader_;
374};
375
376bool DumpSymbols::LoadCommandDumper::SegmentCommand(const Segment &segment) {
377  mach_o::SectionMap section_map;
378  if (!reader_.MapSegmentSections(segment, &section_map))
379    return false;
380
381  if (segment.name == "__TEXT") {
382    module_->SetLoadAddress(segment.vmaddr);
383    mach_o::SectionMap::const_iterator eh_frame =
384        section_map.find("__eh_frame");
385    if (eh_frame != section_map.end()) {
386      // If there is a problem reading this, don't treat it as a fatal error.
387      dumper_.ReadCFI(module_, reader_, eh_frame->second, true);
388    }
389    return true;
390  }
391
392  if (segment.name == "__DWARF") {
393    if (!dumper_.ReadDwarf(module_, reader_, section_map))
394      return false;
395    mach_o::SectionMap::const_iterator debug_frame
396        = section_map.find("__debug_frame");
397    if (debug_frame != section_map.end()) {
398      // If there is a problem reading this, don't treat it as a fatal error.
399      dumper_.ReadCFI(module_, reader_, debug_frame->second, false);
400    }
401  }
402
403  return true;
404}
405
406bool DumpSymbols::LoadCommandDumper::SymtabCommand(const ByteBuffer &entries,
407                                                   const ByteBuffer &strings) {
408  StabsToModule stabs_to_module(module_);
409  // Mac OS X STABS are never "unitized", and the size of the 'value' field
410  // matches the address size of the executable.
411  StabsReader stabs_reader(entries.start, entries.Size(),
412                           strings.start, strings.Size(),
413                           reader_.big_endian(),
414                           reader_.bits_64() ? 8 : 4,
415                           true,
416                           &stabs_to_module);
417  if (!stabs_reader.Process())
418    return false;
419  stabs_to_module.Finalize();
420  return true;
421}
422
423bool DumpSymbols::WriteSymbolFile(FILE *stream) {
424  // Select an object file, if SetArchitecture hasn't been called to set one
425  // explicitly.
426  if (!selected_object_file_) {
427    // If there's only one architecture, that's the one.
428    if (object_files_.size() == 1)
429      selected_object_file_ = &object_files_[0];
430    else {
431      // Look for an object file whose architecture matches our own.
432      const NXArchInfo *local_arch = NXGetLocalArchInfo();
433      if (!SetArchitecture(local_arch->cputype, local_arch->cpusubtype)) {
434        fprintf(stderr, "%s: object file contains more than one"
435		" architecture, none of which match the current"
436                " architecture; specify an architecture explicitly"
437		" with '-a ARCH' to resolve the ambiguity\n",
438		[object_filename_ fileSystemRepresentation]);
439        return false;
440      }
441    }
442  }
443
444  assert(selected_object_file_);
445
446  // Find the name of the selected file's architecture, to appear in
447  // the MODULE record and in error messages.
448  const NXArchInfo *selected_arch_info
449      = NXGetArchInfoFromCpuType(selected_object_file_->cputype,
450                                 selected_object_file_->cpusubtype);
451
452  const char *selected_arch_name = selected_arch_info->name;
453  if (strcmp(selected_arch_name, "i386") == 0)
454    selected_arch_name = "x86";
455
456  // Produce a name to use in error messages that includes the
457  // filename, and the architecture, if there is more than one.
458  selected_object_name_ = [object_filename_ UTF8String];
459  if (object_files_.size() > 1) {
460    selected_object_name_ += ", architecture ";
461    selected_object_name_ + selected_arch_name;
462  }
463
464  // Compute a module name, to appear in the MODULE record.
465  NSString *module_name = [object_filename_ lastPathComponent];
466
467  // Choose an identifier string, to appear in the MODULE record.
468  string identifier = Identifier();
469  if (identifier.empty())
470    return false;
471  identifier += "0";
472
473  // Create a module to hold the debugging information.
474  Module module([module_name UTF8String], "mac", selected_arch_name,
475                identifier);
476
477  // Parse the selected object file.
478  mach_o::Reader::Reporter reporter(selected_object_name_);
479  mach_o::Reader reader(&reporter);
480  if (!reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes])
481                   + selected_object_file_->offset,
482                   selected_object_file_->size,
483		   selected_object_file_->cputype,
484		   selected_object_file_->cpusubtype))
485    return false;
486
487  // Walk its load commands, and deal with whatever is there.
488  LoadCommandDumper load_command_dumper(*this, &module, reader);
489  if (!reader.WalkLoadCommands(&load_command_dumper))
490    return false;
491
492  return module.Write(stream);
493}
494
495}  // namespace google_breakpad
496