1 // Copyright (c) 2011 Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
31 
32 // dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
33 // Find all the debugging info in a file and dump it as a Breakpad symbol file.
34 
35 #include "common/linux/dump_symbols.h"
36 
37 #include <assert.h>
38 #include <elf.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <link.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <sys/mman.h>
46 #include <sys/stat.h>
47 #include <unistd.h>
48 
49 #include <iostream>
50 #include <set>
51 #include <string>
52 #include <utility>
53 #include <vector>
54 
55 #include "common/dwarf/bytereader-inl.h"
56 #include "common/dwarf/dwarf2diehandler.h"
57 #include "common/dwarf_cfi_to_module.h"
58 #include "common/dwarf_cu_to_module.h"
59 #include "common/dwarf_line_to_module.h"
60 #include "common/linux/elfutils.h"
61 #include "common/linux/elfutils-inl.h"
62 #include "common/linux/elf_symbols_to_module.h"
63 #include "common/linux/file_id.h"
64 #include "common/module.h"
65 #include "common/scoped_ptr.h"
66 #ifndef NO_STABS_SUPPORT
67 #include "common/stabs_reader.h"
68 #include "common/stabs_to_module.h"
69 #endif
70 #include "common/using_std_string.h"
71 
72 // This namespace contains helper functions.
73 namespace {
74 
75 using google_breakpad::DumpOptions;
76 using google_breakpad::DwarfCFIToModule;
77 using google_breakpad::DwarfCUToModule;
78 using google_breakpad::DwarfLineToModule;
79 using google_breakpad::ElfClass;
80 using google_breakpad::ElfClass32;
81 using google_breakpad::ElfClass64;
82 using google_breakpad::FindElfSectionByName;
83 using google_breakpad::GetOffset;
84 using google_breakpad::IsValidElf;
85 using google_breakpad::Module;
86 #ifndef NO_STABS_SUPPORT
87 using google_breakpad::StabsToModule;
88 #endif
89 using google_breakpad::scoped_ptr;
90 
91 //
92 // FDWrapper
93 //
94 // Wrapper class to make sure opened file is closed.
95 //
96 class FDWrapper {
97  public:
FDWrapper(int fd)98   explicit FDWrapper(int fd) :
99     fd_(fd) {}
~FDWrapper()100   ~FDWrapper() {
101     if (fd_ != -1)
102       close(fd_);
103   }
get()104   int get() {
105     return fd_;
106   }
release()107   int release() {
108     int fd = fd_;
109     fd_ = -1;
110     return fd;
111   }
112  private:
113   int fd_;
114 };
115 
116 //
117 // MmapWrapper
118 //
119 // Wrapper class to make sure mapped regions are unmapped.
120 //
121 class MmapWrapper {
122  public:
MmapWrapper()123   MmapWrapper() : is_set_(false) {}
~MmapWrapper()124   ~MmapWrapper() {
125     if (is_set_ && base_ != NULL) {
126       assert(size_ > 0);
127       munmap(base_, size_);
128     }
129   }
set(void * mapped_address,size_t mapped_size)130   void set(void *mapped_address, size_t mapped_size) {
131     is_set_ = true;
132     base_ = mapped_address;
133     size_ = mapped_size;
134   }
release()135   void release() {
136     assert(is_set_);
137     is_set_ = false;
138     base_ = NULL;
139     size_ = 0;
140   }
141 
142  private:
143   bool is_set_;
144   void *base_;
145   size_t size_;
146 };
147 
148 // Find the preferred loading address of the binary.
149 template<typename ElfClass>
GetLoadingAddress(const typename ElfClass::Phdr * program_headers,int nheader)150 typename ElfClass::Addr GetLoadingAddress(
151     const typename ElfClass::Phdr* program_headers,
152     int nheader) {
153   typedef typename ElfClass::Phdr Phdr;
154 
155   // For non-PIC executables (e_type == ET_EXEC), the load address is
156   // the start address of the first PT_LOAD segment.  (ELF requires
157   // the segments to be sorted by load address.)  For PIC executables
158   // and dynamic libraries (e_type == ET_DYN), this address will
159   // normally be zero.
160   for (int i = 0; i < nheader; ++i) {
161     const Phdr& header = program_headers[i];
162     if (header.p_type == PT_LOAD)
163       return header.p_vaddr;
164   }
165   return 0;
166 }
167 
168 #ifndef NO_STABS_SUPPORT
169 template<typename ElfClass>
LoadStabs(const typename ElfClass::Ehdr * elf_header,const typename ElfClass::Shdr * stab_section,const typename ElfClass::Shdr * stabstr_section,const bool big_endian,Module * module)170 bool LoadStabs(const typename ElfClass::Ehdr* elf_header,
171                const typename ElfClass::Shdr* stab_section,
172                const typename ElfClass::Shdr* stabstr_section,
173                const bool big_endian,
174                Module* module) {
175   // A callback object to handle data from the STABS reader.
176   StabsToModule handler(module);
177   // Find the addresses of the STABS data, and create a STABS reader object.
178   // On Linux, STABS entries always have 32-bit values, regardless of the
179   // address size of the architecture whose code they're describing, and
180   // the strings are always "unitized".
181   const uint8_t* stabs =
182       GetOffset<ElfClass, uint8_t>(elf_header, stab_section->sh_offset);
183   const uint8_t* stabstr =
184       GetOffset<ElfClass, uint8_t>(elf_header, stabstr_section->sh_offset);
185   google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
186                                       stabstr, stabstr_section->sh_size,
187                                       big_endian, 4, true, &handler);
188   // Read the STABS data, and do post-processing.
189   if (!reader.Process())
190     return false;
191   handler.Finalize();
192   return true;
193 }
194 #endif  // NO_STABS_SUPPORT
195 
196 // A line-to-module loader that accepts line number info parsed by
197 // dwarf2reader::LineInfo and populates a Module and a line vector
198 // with the results.
199 class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler {
200  public:
201   // Create a line-to-module converter using BYTE_READER.
DumperLineToModule(dwarf2reader::ByteReader * byte_reader)202   explicit DumperLineToModule(dwarf2reader::ByteReader *byte_reader)
203       : byte_reader_(byte_reader) { }
StartCompilationUnit(const string & compilation_dir)204   void StartCompilationUnit(const string& compilation_dir) {
205     compilation_dir_ = compilation_dir;
206   }
ReadProgram(const char * program,uint64 length,Module * module,std::vector<Module::Line> * lines)207   void ReadProgram(const char *program, uint64 length,
208                    Module *module, std::vector<Module::Line> *lines) {
209     DwarfLineToModule handler(module, compilation_dir_, lines);
210     dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler);
211     parser.Start();
212   }
213  private:
214   string compilation_dir_;
215   dwarf2reader::ByteReader *byte_reader_;
216 };
217 
218 template<typename ElfClass>
LoadDwarf(const string & dwarf_filename,const typename ElfClass::Ehdr * elf_header,const bool big_endian,bool handle_inter_cu_refs,Module * module)219 bool LoadDwarf(const string& dwarf_filename,
220                const typename ElfClass::Ehdr* elf_header,
221                const bool big_endian,
222                bool handle_inter_cu_refs,
223                Module* module) {
224   typedef typename ElfClass::Shdr Shdr;
225 
226   const dwarf2reader::Endianness endianness = big_endian ?
227       dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
228   dwarf2reader::ByteReader byte_reader(endianness);
229 
230   // Construct a context for this file.
231   DwarfCUToModule::FileContext file_context(dwarf_filename,
232                                             module,
233                                             handle_inter_cu_refs);
234 
235   // Build a map of the ELF file's sections.
236   const Shdr* sections =
237       GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
238   int num_sections = elf_header->e_shnum;
239   const Shdr* section_names = sections + elf_header->e_shstrndx;
240   for (int i = 0; i < num_sections; i++) {
241     const Shdr* section = &sections[i];
242     string name = GetOffset<ElfClass, char>(elf_header,
243                                             section_names->sh_offset) +
244                   section->sh_name;
245     const char* contents = GetOffset<ElfClass, char>(elf_header,
246                                                      section->sh_offset);
247     file_context.AddSectionToSectionMap(name, contents, section->sh_size);
248   }
249 
250   // Parse all the compilation units in the .debug_info section.
251   DumperLineToModule line_to_module(&byte_reader);
252   dwarf2reader::SectionMap::const_iterator debug_info_entry =
253       file_context.section_map().find(".debug_info");
254   assert(debug_info_entry != file_context.section_map().end());
255   const std::pair<const char*, uint64>& debug_info_section =
256       debug_info_entry->second;
257   // This should never have been called if the file doesn't have a
258   // .debug_info section.
259   assert(debug_info_section.first);
260   uint64 debug_info_length = debug_info_section.second;
261   for (uint64 offset = 0; offset < debug_info_length;) {
262     // Make a handler for the root DIE that populates MODULE with the
263     // data that was found.
264     DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset);
265     DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter);
266     // Make a Dwarf2Handler that drives the DIEHandler.
267     dwarf2reader::DIEDispatcher die_dispatcher(&root_handler);
268     // Make a DWARF parser for the compilation unit at OFFSET.
269     dwarf2reader::CompilationUnit reader(file_context.section_map(),
270                                          offset,
271                                          &byte_reader,
272                                          &die_dispatcher);
273     // Process the entire compilation unit; get the offset of the next.
274     offset += reader.Start();
275   }
276   return true;
277 }
278 
279 // Fill REGISTER_NAMES with the register names appropriate to the
280 // machine architecture given in HEADER, indexed by the register
281 // numbers used in DWARF call frame information. Return true on
282 // success, or false if HEADER's machine architecture is not
283 // supported.
284 template<typename ElfClass>
DwarfCFIRegisterNames(const typename ElfClass::Ehdr * elf_header,std::vector<string> * register_names)285 bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
286                            std::vector<string>* register_names) {
287   switch (elf_header->e_machine) {
288     case EM_386:
289       *register_names = DwarfCFIToModule::RegisterNames::I386();
290       return true;
291     case EM_ARM:
292       *register_names = DwarfCFIToModule::RegisterNames::ARM();
293       return true;
294     case EM_MIPS:
295       *register_names = DwarfCFIToModule::RegisterNames::MIPS();
296       return true;
297     case EM_X86_64:
298       *register_names = DwarfCFIToModule::RegisterNames::X86_64();
299       return true;
300     default:
301       return false;
302   }
303 }
304 
305 template<typename ElfClass>
LoadDwarfCFI(const string & dwarf_filename,const typename ElfClass::Ehdr * elf_header,const char * section_name,const typename ElfClass::Shdr * section,const bool eh_frame,const typename ElfClass::Shdr * got_section,const typename ElfClass::Shdr * text_section,const bool big_endian,Module * module)306 bool LoadDwarfCFI(const string& dwarf_filename,
307                   const typename ElfClass::Ehdr* elf_header,
308                   const char* section_name,
309                   const typename ElfClass::Shdr* section,
310                   const bool eh_frame,
311                   const typename ElfClass::Shdr* got_section,
312                   const typename ElfClass::Shdr* text_section,
313                   const bool big_endian,
314                   Module* module) {
315   // Find the appropriate set of register names for this file's
316   // architecture.
317   std::vector<string> register_names;
318   if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &register_names)) {
319     fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';"
320             " cannot convert DWARF call frame information\n",
321             dwarf_filename.c_str(), elf_header->e_machine);
322     return false;
323   }
324 
325   const dwarf2reader::Endianness endianness = big_endian ?
326       dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
327 
328   // Find the call frame information and its size.
329   const char* cfi =
330       GetOffset<ElfClass, char>(elf_header, section->sh_offset);
331   size_t cfi_size = section->sh_size;
332 
333   // Plug together the parser, handler, and their entourages.
334   DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name);
335   DwarfCFIToModule handler(module, register_names, &module_reporter);
336   dwarf2reader::ByteReader byte_reader(endianness);
337 
338   byte_reader.SetAddressSize(ElfClass::kAddrSize);
339 
340   // Provide the base addresses for .eh_frame encoded pointers, if
341   // possible.
342   byte_reader.SetCFIDataBase(section->sh_addr, cfi);
343   if (got_section)
344     byte_reader.SetDataBase(got_section->sh_addr);
345   if (text_section)
346     byte_reader.SetTextBase(text_section->sh_addr);
347 
348   dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename,
349                                                        section_name);
350   dwarf2reader::CallFrameInfo parser(cfi, cfi_size,
351                                      &byte_reader, &handler, &dwarf_reporter,
352                                      eh_frame);
353   parser.Start();
354   return true;
355 }
356 
LoadELF(const string & obj_file,MmapWrapper * map_wrapper,void ** elf_header)357 bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
358              void** elf_header) {
359   int obj_fd = open(obj_file.c_str(), O_RDONLY);
360   if (obj_fd < 0) {
361     fprintf(stderr, "Failed to open ELF file '%s': %s\n",
362             obj_file.c_str(), strerror(errno));
363     return false;
364   }
365   FDWrapper obj_fd_wrapper(obj_fd);
366   struct stat st;
367   if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
368     fprintf(stderr, "Unable to fstat ELF file '%s': %s\n",
369             obj_file.c_str(), strerror(errno));
370     return false;
371   }
372   void *obj_base = mmap(NULL, st.st_size,
373                         PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0);
374   if (obj_base == MAP_FAILED) {
375     fprintf(stderr, "Failed to mmap ELF file '%s': %s\n",
376             obj_file.c_str(), strerror(errno));
377     return false;
378   }
379   map_wrapper->set(obj_base, st.st_size);
380   *elf_header = obj_base;
381   if (!IsValidElf(*elf_header)) {
382     fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
383     return false;
384   }
385   return true;
386 }
387 
388 // Get the endianness of ELF_HEADER. If it's invalid, return false.
389 template<typename ElfClass>
ElfEndianness(const typename ElfClass::Ehdr * elf_header,bool * big_endian)390 bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
391                    bool* big_endian) {
392   if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
393     *big_endian = false;
394     return true;
395   }
396   if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
397     *big_endian = true;
398     return true;
399   }
400 
401   fprintf(stderr, "bad data encoding in ELF header: %d\n",
402           elf_header->e_ident[EI_DATA]);
403   return false;
404 }
405 
406 // Read the .gnu_debuglink and get the debug file name. If anything goes
407 // wrong, return an empty string.
408 template<typename ElfClass>
ReadDebugLink(const char * debuglink,size_t debuglink_size,const string & obj_file,const std::vector<string> & debug_dirs)409 string ReadDebugLink(const char* debuglink,
410                      size_t debuglink_size,
411                      const string& obj_file,
412                      const std::vector<string>& debug_dirs) {
413   size_t debuglink_len = strlen(debuglink) + 5;  // '\0' + CRC32.
414   debuglink_len = 4 * ((debuglink_len + 3) / 4);  // Round to nearest 4 bytes.
415 
416   // Sanity check.
417   if (debuglink_len != debuglink_size) {
418     fprintf(stderr, "Mismatched .gnu_debuglink string / section size: "
419             "%zx %zx\n", debuglink_len, debuglink_size);
420     return "";
421   }
422 
423   bool found = false;
424   int debuglink_fd = -1;
425   string debuglink_path;
426   std::vector<string>::const_iterator it;
427   for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) {
428     const string& debug_dir = *it;
429     debuglink_path = debug_dir + "/" + debuglink;
430     debuglink_fd = open(debuglink_path.c_str(), O_RDONLY);
431     if (debuglink_fd >= 0) {
432       found = true;
433       break;
434     }
435   }
436 
437   if (!found) {
438     fprintf(stderr, "Failed to find debug ELF file for '%s' after trying:\n",
439             obj_file.c_str());
440     for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) {
441       const string debug_dir = *it;
442       fprintf(stderr, "  %s/%s\n", debug_dir.c_str(), debuglink);
443     }
444     return "";
445   }
446 
447   FDWrapper debuglink_fd_wrapper(debuglink_fd);
448   // TODO(thestig) check the CRC-32 at the end of the .gnu_debuglink
449   // section.
450 
451   return debuglink_path;
452 }
453 
454 //
455 // LoadSymbolsInfo
456 //
457 // Holds the state between the two calls to LoadSymbols() in case it's necessary
458 // to follow the .gnu_debuglink section and load debug information from a
459 // different file.
460 //
461 template<typename ElfClass>
462 class LoadSymbolsInfo {
463  public:
464   typedef typename ElfClass::Addr Addr;
465 
LoadSymbolsInfo(const std::vector<string> & dbg_dirs)466   explicit LoadSymbolsInfo(const std::vector<string>& dbg_dirs) :
467     debug_dirs_(dbg_dirs),
468     has_loading_addr_(false) {}
469 
470   // Keeps track of which sections have been loaded so sections don't
471   // accidentally get loaded twice from two different files.
LoadedSection(const string & section)472   void LoadedSection(const string &section) {
473     if (loaded_sections_.count(section) == 0) {
474       loaded_sections_.insert(section);
475     } else {
476       fprintf(stderr, "Section %s has already been loaded.\n",
477               section.c_str());
478     }
479   }
480 
481   // The ELF file and linked debug file are expected to have the same preferred
482   // loading address.
set_loading_addr(Addr addr,const string & filename)483   void set_loading_addr(Addr addr, const string &filename) {
484     if (!has_loading_addr_) {
485       loading_addr_ = addr;
486       loaded_file_ = filename;
487       return;
488     }
489 
490     if (addr != loading_addr_) {
491       fprintf(stderr,
492               "ELF file '%s' and debug ELF file '%s' "
493               "have different load addresses.\n",
494               loaded_file_.c_str(), filename.c_str());
495       assert(false);
496     }
497   }
498 
499   // Setters and getters
debug_dirs() const500   const std::vector<string>& debug_dirs() const {
501     return debug_dirs_;
502   }
503 
debuglink_file() const504   string debuglink_file() const {
505     return debuglink_file_;
506   }
set_debuglink_file(string file)507   void set_debuglink_file(string file) {
508     debuglink_file_ = file;
509   }
510 
511  private:
512   const std::vector<string>& debug_dirs_; // Directories in which to
513                                           // search for the debug ELF file.
514 
515   string debuglink_file_;  // Full path to the debug ELF file.
516 
517   bool has_loading_addr_;  // Indicate if LOADING_ADDR_ is valid.
518 
519   Addr loading_addr_;  // Saves the preferred loading address from the
520                        // first call to LoadSymbols().
521 
522   string loaded_file_;  // Name of the file loaded from the first call to
523                         // LoadSymbols().
524 
525   std::set<string> loaded_sections_;  // Tracks the Loaded ELF sections
526                                       // between calls to LoadSymbols().
527 };
528 
529 template<typename ElfClass>
LoadSymbols(const string & obj_file,const bool big_endian,const typename ElfClass::Ehdr * elf_header,const bool read_gnu_debug_link,LoadSymbolsInfo<ElfClass> * info,const DumpOptions & options,Module * module)530 bool LoadSymbols(const string& obj_file,
531                  const bool big_endian,
532                  const typename ElfClass::Ehdr* elf_header,
533                  const bool read_gnu_debug_link,
534                  LoadSymbolsInfo<ElfClass>* info,
535                  const DumpOptions& options,
536                  Module* module) {
537   typedef typename ElfClass::Addr Addr;
538   typedef typename ElfClass::Phdr Phdr;
539   typedef typename ElfClass::Shdr Shdr;
540   typedef typename ElfClass::Word Word;
541 
542   Addr loading_addr = GetLoadingAddress<ElfClass>(
543       GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
544       elf_header->e_phnum);
545   module->SetLoadAddress(loading_addr);
546   info->set_loading_addr(loading_addr, obj_file);
547 
548   Word debug_section_type =
549       elf_header->e_machine == EM_MIPS ? SHT_MIPS_DWARF : SHT_PROGBITS;
550   const Shdr* sections =
551       GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
552   const Shdr* section_names = sections + elf_header->e_shstrndx;
553   const char* names =
554       GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
555   const char *names_end = names + section_names->sh_size;
556   bool found_debug_info_section = false;
557   bool found_usable_info = false;
558 
559   if (options.symbol_data != ONLY_CFI) {
560 #ifndef NO_STABS_SUPPORT
561     // Look for STABS debugging information, and load it if present.
562     const Shdr* stab_section =
563       FindElfSectionByName<ElfClass>(".stab", SHT_PROGBITS,
564                                      sections, names, names_end,
565                                      elf_header->e_shnum);
566     if (stab_section) {
567       const Shdr* stabstr_section = stab_section->sh_link + sections;
568       if (stabstr_section) {
569         found_debug_info_section = true;
570         found_usable_info = true;
571         info->LoadedSection(".stab");
572         if (!LoadStabs<ElfClass>(elf_header, stab_section, stabstr_section,
573                                  big_endian, module)) {
574           fprintf(stderr, "%s: \".stab\" section found, but failed to load"
575                   " STABS debugging information\n", obj_file.c_str());
576         }
577       }
578     }
579 #endif  // NO_STABS_SUPPORT
580 
581     // Look for DWARF debugging information, and load it if present.
582     const Shdr* dwarf_section =
583       FindElfSectionByName<ElfClass>(".debug_info", debug_section_type,
584                                      sections, names, names_end,
585                                      elf_header->e_shnum);
586     if (dwarf_section) {
587       found_debug_info_section = true;
588       found_usable_info = true;
589       info->LoadedSection(".debug_info");
590       if (!LoadDwarf<ElfClass>(obj_file, elf_header, big_endian,
591                                options.handle_inter_cu_refs, module)) {
592         fprintf(stderr, "%s: \".debug_info\" section found, but failed to load "
593                 "DWARF debugging information\n", obj_file.c_str());
594       }
595     }
596   }
597 
598   if (options.symbol_data != NO_CFI) {
599     // Dwarf Call Frame Information (CFI) is actually independent from
600     // the other DWARF debugging information, and can be used alone.
601     const Shdr* dwarf_cfi_section =
602         FindElfSectionByName<ElfClass>(".debug_frame", debug_section_type,
603                                        sections, names, names_end,
604                                        elf_header->e_shnum);
605     if (dwarf_cfi_section) {
606       // Ignore the return value of this function; even without call frame
607       // information, the other debugging information could be perfectly
608       // useful.
609       info->LoadedSection(".debug_frame");
610       bool result =
611           LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
612                                  dwarf_cfi_section, false, 0, 0, big_endian,
613                                  module);
614       found_usable_info = found_usable_info || result;
615     }
616 
617     // Linux C++ exception handling information can also provide
618     // unwinding data.
619     const Shdr* eh_frame_section =
620         FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS,
621                                        sections, names, names_end,
622                                        elf_header->e_shnum);
623     if (eh_frame_section) {
624       // Pointers in .eh_frame data may be relative to the base addresses of
625       // certain sections. Provide those sections if present.
626       const Shdr* got_section =
627           FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS,
628                                          sections, names, names_end,
629                                          elf_header->e_shnum);
630       const Shdr* text_section =
631           FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS,
632                                          sections, names, names_end,
633                                          elf_header->e_shnum);
634       info->LoadedSection(".eh_frame");
635       // As above, ignore the return value of this function.
636       bool result =
637           LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame",
638                                  eh_frame_section, true,
639                                  got_section, text_section, big_endian, module);
640       found_usable_info = found_usable_info || result;
641     }
642   }
643 
644   if (!found_debug_info_section) {
645     fprintf(stderr, "%s: file contains no debugging information"
646             " (no \".stab\" or \".debug_info\" sections)\n",
647             obj_file.c_str());
648 
649     // Failed, but maybe there's a .gnu_debuglink section?
650     if (read_gnu_debug_link) {
651       const Shdr* gnu_debuglink_section
652           = FindElfSectionByName<ElfClass>(".gnu_debuglink", SHT_PROGBITS,
653                                            sections, names,
654                                            names_end, elf_header->e_shnum);
655       if (gnu_debuglink_section) {
656         if (!info->debug_dirs().empty()) {
657           const char* debuglink_contents =
658               GetOffset<ElfClass, char>(elf_header,
659                                         gnu_debuglink_section->sh_offset);
660           string debuglink_file
661               = ReadDebugLink<ElfClass>(debuglink_contents,
662                                         gnu_debuglink_section->sh_size,
663                                         obj_file, info->debug_dirs());
664           info->set_debuglink_file(debuglink_file);
665         } else {
666           fprintf(stderr, ".gnu_debuglink section found in '%s', "
667                   "but no debug path specified.\n", obj_file.c_str());
668         }
669       } else {
670         fprintf(stderr, "%s does not contain a .gnu_debuglink section.\n",
671                 obj_file.c_str());
672       }
673     } else {
674       if (options.symbol_data != ONLY_CFI) {
675         // The caller doesn't want to consult .gnu_debuglink.
676         // See if there are export symbols available.
677         const Shdr* dynsym_section =
678           FindElfSectionByName<ElfClass>(".dynsym", SHT_DYNSYM,
679                                          sections, names, names_end,
680                                          elf_header->e_shnum);
681         const Shdr* dynstr_section =
682           FindElfSectionByName<ElfClass>(".dynstr", SHT_STRTAB,
683                                          sections, names, names_end,
684                                          elf_header->e_shnum);
685         if (dynsym_section && dynstr_section) {
686           info->LoadedSection(".dynsym");
687 
688           const uint8_t* dynsyms =
689               GetOffset<ElfClass, uint8_t>(elf_header,
690                                            dynsym_section->sh_offset);
691           const uint8_t* dynstrs =
692               GetOffset<ElfClass, uint8_t>(elf_header,
693                                            dynstr_section->sh_offset);
694           bool result =
695               ELFSymbolsToModule(dynsyms,
696                                  dynsym_section->sh_size,
697                                  dynstrs,
698                                  dynstr_section->sh_size,
699                                  big_endian,
700                                  ElfClass::kAddrSize,
701                                  module);
702           found_usable_info = found_usable_info || result;
703         }
704       }
705 
706       // Return true if some usable information was found, since
707       // the caller doesn't want to use .gnu_debuglink.
708       return found_usable_info;
709     }
710 
711     // No debug info was found, let the user try again with .gnu_debuglink
712     // if present.
713     return false;
714   }
715 
716   return true;
717 }
718 
719 // Return the breakpad symbol file identifier for the architecture of
720 // ELF_HEADER.
721 template<typename ElfClass>
ElfArchitecture(const typename ElfClass::Ehdr * elf_header)722 const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
723   typedef typename ElfClass::Half Half;
724   Half arch = elf_header->e_machine;
725   switch (arch) {
726     case EM_386:        return "x86";
727     case EM_ARM:        return "arm";
728     case EM_MIPS:       return "mips";
729     case EM_PPC64:      return "ppc64";
730     case EM_PPC:        return "ppc";
731     case EM_S390:       return "s390";
732     case EM_SPARC:      return "sparc";
733     case EM_SPARCV9:    return "sparcv9";
734     case EM_X86_64:     return "x86_64";
735     default: return NULL;
736   }
737 }
738 
739 // Format the Elf file identifier in IDENTIFIER as a UUID with the
740 // dashes removed.
FormatIdentifier(unsigned char identifier[16])741 string FormatIdentifier(unsigned char identifier[16]) {
742   char identifier_str[40];
743   google_breakpad::FileID::ConvertIdentifierToString(
744       identifier,
745       identifier_str,
746       sizeof(identifier_str));
747   string id_no_dash;
748   for (int i = 0; identifier_str[i] != '\0'; ++i)
749     if (identifier_str[i] != '-')
750       id_no_dash += identifier_str[i];
751   // Add an extra "0" by the end.  PDB files on Windows have an 'age'
752   // number appended to the end of the file identifier; this isn't
753   // really used or necessary on other platforms, but be consistent.
754   id_no_dash += '0';
755   return id_no_dash;
756 }
757 
758 // Return the non-directory portion of FILENAME: the portion after the
759 // last slash, or the whole filename if there are no slashes.
BaseFileName(const string & filename)760 string BaseFileName(const string &filename) {
761   // Lots of copies!  basename's behavior is less than ideal.
762   char *c_filename = strdup(filename.c_str());
763   string base = basename(c_filename);
764   free(c_filename);
765   return base;
766 }
767 
768 template<typename ElfClass>
ReadSymbolDataElfClass(const typename ElfClass::Ehdr * elf_header,const string & obj_filename,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** out_module)769 bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
770                              const string& obj_filename,
771                              const std::vector<string>& debug_dirs,
772                              const DumpOptions& options,
773                              Module** out_module) {
774   typedef typename ElfClass::Ehdr Ehdr;
775   typedef typename ElfClass::Shdr Shdr;
776 
777   *out_module = NULL;
778 
779   unsigned char identifier[16];
780   if (!google_breakpad::FileID::ElfFileIdentifierFromMappedFile(elf_header,
781                                                                 identifier)) {
782     fprintf(stderr, "%s: unable to generate file identifier\n",
783             obj_filename.c_str());
784     return false;
785   }
786 
787   const char *architecture = ElfArchitecture<ElfClass>(elf_header);
788   if (!architecture) {
789     fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
790             obj_filename.c_str(), elf_header->e_machine);
791     return false;
792   }
793 
794   // Figure out what endianness this file is.
795   bool big_endian;
796   if (!ElfEndianness<ElfClass>(elf_header, &big_endian))
797     return false;
798 
799   string name = BaseFileName(obj_filename);
800   string os = "Linux";
801   string id = FormatIdentifier(identifier);
802 
803   LoadSymbolsInfo<ElfClass> info(debug_dirs);
804   scoped_ptr<Module> module(new Module(name, os, architecture, id));
805   if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
806                              !debug_dirs.empty(), &info,
807                              options, module.get())) {
808     const string debuglink_file = info.debuglink_file();
809     if (debuglink_file.empty())
810       return false;
811 
812     // Load debuglink ELF file.
813     fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
814     MmapWrapper debug_map_wrapper;
815     Ehdr* debug_elf_header = NULL;
816     if (!LoadELF(debuglink_file, &debug_map_wrapper,
817                  reinterpret_cast<void**>(&debug_elf_header)))
818       return false;
819     // Sanity checks to make sure everything matches up.
820     const char *debug_architecture =
821         ElfArchitecture<ElfClass>(debug_elf_header);
822     if (!debug_architecture) {
823       fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
824               debuglink_file.c_str(), debug_elf_header->e_machine);
825       return false;
826     }
827     if (strcmp(architecture, debug_architecture)) {
828       fprintf(stderr, "%s with ELF machine architecture %s does not match "
829               "%s with ELF architecture %s\n",
830               debuglink_file.c_str(), debug_architecture,
831               obj_filename.c_str(), architecture);
832       return false;
833     }
834 
835     bool debug_big_endian;
836     if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
837       return false;
838     if (debug_big_endian != big_endian) {
839       fprintf(stderr, "%s and %s does not match in endianness\n",
840               obj_filename.c_str(), debuglink_file.c_str());
841       return false;
842     }
843 
844     if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian,
845                                debug_elf_header, false, &info,
846                                options, module.get())) {
847       return false;
848     }
849   }
850 
851   *out_module = module.release();
852   return true;
853 }
854 
855 }  // namespace
856 
857 namespace google_breakpad {
858 
859 // Not explicitly exported, but not static so it can be used in unit tests.
ReadSymbolDataInternal(const uint8_t * obj_file,const string & obj_filename,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** module)860 bool ReadSymbolDataInternal(const uint8_t* obj_file,
861                             const string& obj_filename,
862                             const std::vector<string>& debug_dirs,
863                             const DumpOptions& options,
864                             Module** module) {
865   if (!IsValidElf(obj_file)) {
866     fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
867     return false;
868   }
869 
870   int elfclass = ElfClass(obj_file);
871   if (elfclass == ELFCLASS32) {
872     return ReadSymbolDataElfClass<ElfClass32>(
873         reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, debug_dirs,
874         options, module);
875   }
876   if (elfclass == ELFCLASS64) {
877     return ReadSymbolDataElfClass<ElfClass64>(
878         reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, debug_dirs,
879         options, module);
880   }
881 
882   return false;
883 }
884 
WriteSymbolFile(const string & obj_file,const std::vector<string> & debug_dirs,const DumpOptions & options,std::ostream & sym_stream)885 bool WriteSymbolFile(const string &obj_file,
886                      const std::vector<string>& debug_dirs,
887                      const DumpOptions& options,
888                      std::ostream &sym_stream) {
889   Module* module;
890   if (!ReadSymbolData(obj_file, debug_dirs, options, &module))
891     return false;
892 
893   bool result = module->Write(sym_stream, options.symbol_data);
894   delete module;
895   return result;
896 }
897 
ReadSymbolData(const string & obj_file,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** module)898 bool ReadSymbolData(const string& obj_file,
899                     const std::vector<string>& debug_dirs,
900                     const DumpOptions& options,
901                     Module** module) {
902   MmapWrapper map_wrapper;
903   void* elf_header = NULL;
904   if (!LoadELF(obj_file, &map_wrapper, &elf_header))
905     return false;
906 
907   return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
908                                 obj_file, debug_dirs, options, module);
909 }
910 
911 }  // namespace google_breakpad
912