1 // Copyright (c) 2011 Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
31 
32 // dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
33 // Find all the debugging info in a file and dump it as a Breakpad symbol file.
34 
35 #include "common/linux/dump_symbols.h"
36 
37 #include <assert.h>
38 #include <elf.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <limits.h>
42 #include <link.h>
43 #include <stdint.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <sys/mman.h>
48 #include <sys/stat.h>
49 #include <unistd.h>
50 
51 #include <iostream>
52 #include <set>
53 #include <string>
54 #include <utility>
55 #include <vector>
56 
57 #include "common/dwarf/bytereader-inl.h"
58 #include "common/dwarf/dwarf2diehandler.h"
59 #include "common/dwarf_cfi_to_module.h"
60 #include "common/dwarf_cu_to_module.h"
61 #include "common/dwarf_line_to_module.h"
62 #include "common/dwarf_range_list_handler.h"
63 #include "common/linux/crc32.h"
64 #include "common/linux/eintr_wrapper.h"
65 #include "common/linux/elfutils.h"
66 #include "common/linux/elfutils-inl.h"
67 #include "common/linux/elf_symbols_to_module.h"
68 #include "common/linux/file_id.h"
69 #include "common/memory_allocator.h"
70 #include "common/module.h"
71 #include "common/path_helper.h"
72 #include "common/scoped_ptr.h"
73 #ifndef NO_STABS_SUPPORT
74 #include "common/stabs_reader.h"
75 #include "common/stabs_to_module.h"
76 #endif
77 #include "common/using_std_string.h"
78 
79 // This namespace contains helper functions.
80 namespace {
81 
82 using google_breakpad::DumpOptions;
83 using google_breakpad::DwarfCFIToModule;
84 using google_breakpad::DwarfCUToModule;
85 using google_breakpad::DwarfLineToModule;
86 using google_breakpad::DwarfRangeListHandler;
87 using google_breakpad::ElfClass;
88 using google_breakpad::ElfClass32;
89 using google_breakpad::ElfClass64;
90 using google_breakpad::FileID;
91 using google_breakpad::FindElfSectionByName;
92 using google_breakpad::GetOffset;
93 using google_breakpad::IsValidElf;
94 using google_breakpad::kDefaultBuildIdSize;
95 using google_breakpad::Module;
96 using google_breakpad::PageAllocator;
97 #ifndef NO_STABS_SUPPORT
98 using google_breakpad::StabsToModule;
99 #endif
100 using google_breakpad::scoped_ptr;
101 using google_breakpad::wasteful_vector;
102 
103 // Define AARCH64 ELF architecture if host machine does not include this define.
104 #ifndef EM_AARCH64
105 #define EM_AARCH64      183
106 #endif
107 
108 //
109 // FDWrapper
110 //
111 // Wrapper class to make sure opened file is closed.
112 //
113 class FDWrapper {
114  public:
FDWrapper(int fd)115   explicit FDWrapper(int fd) :
116     fd_(fd) {}
~FDWrapper()117   ~FDWrapper() {
118     if (fd_ != -1)
119       close(fd_);
120   }
get()121   int get() {
122     return fd_;
123   }
release()124   int release() {
125     int fd = fd_;
126     fd_ = -1;
127     return fd;
128   }
129  private:
130   int fd_;
131 };
132 
133 //
134 // MmapWrapper
135 //
136 // Wrapper class to make sure mapped regions are unmapped.
137 //
138 class MmapWrapper {
139  public:
MmapWrapper()140   MmapWrapper() : is_set_(false) {}
~MmapWrapper()141   ~MmapWrapper() {
142     if (is_set_ && base_ != NULL) {
143       assert(size_ > 0);
144       munmap(base_, size_);
145     }
146   }
set(void * mapped_address,size_t mapped_size)147   void set(void *mapped_address, size_t mapped_size) {
148     is_set_ = true;
149     base_ = mapped_address;
150     size_ = mapped_size;
151   }
release()152   void release() {
153     assert(is_set_);
154     is_set_ = false;
155     base_ = NULL;
156     size_ = 0;
157   }
158 
159  private:
160   bool is_set_;
161   void* base_;
162   size_t size_;
163 };
164 
165 // Find the preferred loading address of the binary.
166 template<typename ElfClass>
GetLoadingAddress(const typename ElfClass::Phdr * program_headers,int nheader)167 typename ElfClass::Addr GetLoadingAddress(
168     const typename ElfClass::Phdr* program_headers,
169     int nheader) {
170   typedef typename ElfClass::Phdr Phdr;
171 
172   // For non-PIC executables (e_type == ET_EXEC), the load address is
173   // the start address of the first PT_LOAD segment.  (ELF requires
174   // the segments to be sorted by load address.)  For PIC executables
175   // and dynamic libraries (e_type == ET_DYN), this address will
176   // normally be zero.
177   for (int i = 0; i < nheader; ++i) {
178     const Phdr& header = program_headers[i];
179     if (header.p_type == PT_LOAD)
180       return header.p_vaddr;
181   }
182   return 0;
183 }
184 
185 // Find the set of address ranges for all PT_LOAD segments.
186 template <typename ElfClass>
GetPtLoadSegmentRanges(const typename ElfClass::Phdr * program_headers,int nheader)187 vector<Module::Range> GetPtLoadSegmentRanges(
188     const typename ElfClass::Phdr* program_headers,
189     int nheader) {
190   typedef typename ElfClass::Phdr Phdr;
191   vector<Module::Range> ranges;
192 
193   for (int i = 0; i < nheader; ++i) {
194     const Phdr& header = program_headers[i];
195     if (header.p_type == PT_LOAD) {
196       ranges.push_back(Module::Range(header.p_vaddr, header.p_memsz));
197     }
198   }
199   return ranges;
200 }
201 
202 #ifndef NO_STABS_SUPPORT
203 template<typename ElfClass>
LoadStabs(const typename ElfClass::Ehdr * elf_header,const typename ElfClass::Shdr * stab_section,const typename ElfClass::Shdr * stabstr_section,const bool big_endian,Module * module)204 bool LoadStabs(const typename ElfClass::Ehdr* elf_header,
205                const typename ElfClass::Shdr* stab_section,
206                const typename ElfClass::Shdr* stabstr_section,
207                const bool big_endian,
208                Module* module) {
209   // A callback object to handle data from the STABS reader.
210   StabsToModule handler(module);
211   // Find the addresses of the STABS data, and create a STABS reader object.
212   // On Linux, STABS entries always have 32-bit values, regardless of the
213   // address size of the architecture whose code they're describing, and
214   // the strings are always "unitized".
215   const uint8_t* stabs =
216       GetOffset<ElfClass, uint8_t>(elf_header, stab_section->sh_offset);
217   const uint8_t* stabstr =
218       GetOffset<ElfClass, uint8_t>(elf_header, stabstr_section->sh_offset);
219   google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
220                                       stabstr, stabstr_section->sh_size,
221                                       big_endian, 4, true, &handler);
222   // Read the STABS data, and do post-processing.
223   if (!reader.Process())
224     return false;
225   handler.Finalize();
226   return true;
227 }
228 #endif  // NO_STABS_SUPPORT
229 
230 // A range handler that accepts rangelist data parsed by
231 // dwarf2reader::RangeListReader and populates a range vector (typically
232 // owned by a function) with the results.
233 class DumperRangesHandler : public DwarfCUToModule::RangesHandler {
234  public:
DumperRangesHandler(const uint8_t * buffer,uint64 size,dwarf2reader::ByteReader * reader)235   DumperRangesHandler(const uint8_t *buffer, uint64 size,
236                       dwarf2reader::ByteReader* reader)
237       : buffer_(buffer), size_(size), reader_(reader) { }
238 
ReadRanges(uint64 offset,Module::Address base_address,vector<Module::Range> * ranges)239   bool ReadRanges(uint64 offset, Module::Address base_address,
240                   vector<Module::Range>* ranges) {
241     DwarfRangeListHandler handler(base_address, ranges);
242     dwarf2reader::RangeListReader rangelist_reader(buffer_, size_, reader_,
243                                                    &handler);
244 
245     return rangelist_reader.ReadRangeList(offset);
246   }
247 
248  private:
249   const uint8_t *buffer_;
250   uint64 size_;
251   dwarf2reader::ByteReader* reader_;
252 };
253 
254 // A line-to-module loader that accepts line number info parsed by
255 // dwarf2reader::LineInfo and populates a Module and a line vector
256 // with the results.
257 class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler {
258  public:
259   // Create a line-to-module converter using BYTE_READER.
DumperLineToModule(dwarf2reader::ByteReader * byte_reader)260   explicit DumperLineToModule(dwarf2reader::ByteReader *byte_reader)
261       : byte_reader_(byte_reader) { }
StartCompilationUnit(const string & compilation_dir)262   void StartCompilationUnit(const string& compilation_dir) {
263     compilation_dir_ = compilation_dir;
264   }
ReadProgram(const uint8_t * program,uint64 length,Module * module,std::vector<Module::Line> * lines)265   void ReadProgram(const uint8_t *program, uint64 length,
266                    Module* module, std::vector<Module::Line>* lines) {
267     DwarfLineToModule handler(module, compilation_dir_, lines);
268     dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler);
269     parser.Start();
270   }
271  private:
272   string compilation_dir_;
273   dwarf2reader::ByteReader *byte_reader_;
274 };
275 
276 template<typename ElfClass>
LoadDwarf(const string & dwarf_filename,const typename ElfClass::Ehdr * elf_header,const bool big_endian,bool handle_inter_cu_refs,Module * module)277 bool LoadDwarf(const string& dwarf_filename,
278                const typename ElfClass::Ehdr* elf_header,
279                const bool big_endian,
280                bool handle_inter_cu_refs,
281                Module* module) {
282   typedef typename ElfClass::Shdr Shdr;
283 
284   const dwarf2reader::Endianness endianness = big_endian ?
285       dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
286   dwarf2reader::ByteReader byte_reader(endianness);
287 
288   // Construct a context for this file.
289   DwarfCUToModule::FileContext file_context(dwarf_filename,
290                                             module,
291                                             handle_inter_cu_refs);
292 
293   // Build a map of the ELF file's sections.
294   const Shdr* sections =
295       GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
296   int num_sections = elf_header->e_shnum;
297   const Shdr* section_names = sections + elf_header->e_shstrndx;
298   for (int i = 0; i < num_sections; i++) {
299     const Shdr* section = &sections[i];
300     string name = GetOffset<ElfClass, char>(elf_header,
301                                             section_names->sh_offset) +
302                   section->sh_name;
303     const uint8_t *contents = GetOffset<ElfClass, uint8_t>(elf_header,
304                                                            section->sh_offset);
305     file_context.AddSectionToSectionMap(name, contents, section->sh_size);
306   }
307 
308   // Optional .debug_ranges reader
309   scoped_ptr<DumperRangesHandler> ranges_handler;
310   dwarf2reader::SectionMap::const_iterator ranges_entry =
311       file_context.section_map().find(".debug_ranges");
312   if (ranges_entry != file_context.section_map().end()) {
313     const std::pair<const uint8_t *, uint64>& ranges_section =
314       ranges_entry->second;
315     ranges_handler.reset(
316       new DumperRangesHandler(ranges_section.first, ranges_section.second,
317                               &byte_reader));
318   }
319 
320   // Parse all the compilation units in the .debug_info section.
321   DumperLineToModule line_to_module(&byte_reader);
322   dwarf2reader::SectionMap::const_iterator debug_info_entry =
323       file_context.section_map().find(".debug_info");
324   assert(debug_info_entry != file_context.section_map().end());
325   const std::pair<const uint8_t *, uint64>& debug_info_section =
326       debug_info_entry->second;
327   // This should never have been called if the file doesn't have a
328   // .debug_info section.
329   assert(debug_info_section.first);
330   uint64 debug_info_length = debug_info_section.second;
331   for (uint64 offset = 0; offset < debug_info_length;) {
332     // Make a handler for the root DIE that populates MODULE with the
333     // data that was found.
334     DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset);
335     DwarfCUToModule root_handler(&file_context, &line_to_module,
336                                  ranges_handler.get(), &reporter);
337     // Make a Dwarf2Handler that drives the DIEHandler.
338     dwarf2reader::DIEDispatcher die_dispatcher(&root_handler);
339     // Make a DWARF parser for the compilation unit at OFFSET.
340     dwarf2reader::CompilationUnit reader(dwarf_filename,
341                                          file_context.section_map(),
342                                          offset,
343                                          &byte_reader,
344                                          &die_dispatcher);
345     // Process the entire compilation unit; get the offset of the next.
346     offset += reader.Start();
347   }
348   return true;
349 }
350 
351 // Fill REGISTER_NAMES with the register names appropriate to the
352 // machine architecture given in HEADER, indexed by the register
353 // numbers used in DWARF call frame information. Return true on
354 // success, or false if HEADER's machine architecture is not
355 // supported.
356 template<typename ElfClass>
DwarfCFIRegisterNames(const typename ElfClass::Ehdr * elf_header,std::vector<string> * register_names)357 bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
358                            std::vector<string>* register_names) {
359   switch (elf_header->e_machine) {
360     case EM_386:
361       *register_names = DwarfCFIToModule::RegisterNames::I386();
362       return true;
363     case EM_ARM:
364       *register_names = DwarfCFIToModule::RegisterNames::ARM();
365       return true;
366     case EM_AARCH64:
367       *register_names = DwarfCFIToModule::RegisterNames::ARM64();
368       return true;
369     case EM_MIPS:
370       *register_names = DwarfCFIToModule::RegisterNames::MIPS();
371       return true;
372     case EM_X86_64:
373       *register_names = DwarfCFIToModule::RegisterNames::X86_64();
374       return true;
375     default:
376       return false;
377   }
378 }
379 
380 template<typename ElfClass>
LoadDwarfCFI(const string & dwarf_filename,const typename ElfClass::Ehdr * elf_header,const char * section_name,const typename ElfClass::Shdr * section,const bool eh_frame,const typename ElfClass::Shdr * got_section,const typename ElfClass::Shdr * text_section,const bool big_endian,Module * module)381 bool LoadDwarfCFI(const string& dwarf_filename,
382                   const typename ElfClass::Ehdr* elf_header,
383                   const char* section_name,
384                   const typename ElfClass::Shdr* section,
385                   const bool eh_frame,
386                   const typename ElfClass::Shdr* got_section,
387                   const typename ElfClass::Shdr* text_section,
388                   const bool big_endian,
389                   Module* module) {
390   // Find the appropriate set of register names for this file's
391   // architecture.
392   std::vector<string> register_names;
393   if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &register_names)) {
394     fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';"
395             " cannot convert DWARF call frame information\n",
396             dwarf_filename.c_str(), elf_header->e_machine);
397     return false;
398   }
399 
400   const dwarf2reader::Endianness endianness = big_endian ?
401       dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
402 
403   // Find the call frame information and its size.
404   const uint8_t *cfi =
405       GetOffset<ElfClass, uint8_t>(elf_header, section->sh_offset);
406   size_t cfi_size = section->sh_size;
407 
408   // Plug together the parser, handler, and their entourages.
409   DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name);
410   DwarfCFIToModule handler(module, register_names, &module_reporter);
411   dwarf2reader::ByteReader byte_reader(endianness);
412 
413   byte_reader.SetAddressSize(ElfClass::kAddrSize);
414 
415   // Provide the base addresses for .eh_frame encoded pointers, if
416   // possible.
417   byte_reader.SetCFIDataBase(section->sh_addr, cfi);
418   if (got_section)
419     byte_reader.SetDataBase(got_section->sh_addr);
420   if (text_section)
421     byte_reader.SetTextBase(text_section->sh_addr);
422 
423   dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename,
424                                                        section_name);
425   dwarf2reader::CallFrameInfo parser(cfi, cfi_size,
426                                      &byte_reader, &handler, &dwarf_reporter,
427                                      eh_frame);
428   parser.Start();
429   return true;
430 }
431 
LoadELF(const string & obj_file,MmapWrapper * map_wrapper,void ** elf_header)432 bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
433              void** elf_header) {
434   int obj_fd = open(obj_file.c_str(), O_RDONLY);
435   if (obj_fd < 0) {
436     fprintf(stderr, "Failed to open ELF file '%s': %s\n",
437             obj_file.c_str(), strerror(errno));
438     return false;
439   }
440   FDWrapper obj_fd_wrapper(obj_fd);
441   struct stat st;
442   if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
443     fprintf(stderr, "Unable to fstat ELF file '%s': %s\n",
444             obj_file.c_str(), strerror(errno));
445     return false;
446   }
447   void* obj_base = mmap(NULL, st.st_size,
448                         PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0);
449   if (obj_base == MAP_FAILED) {
450     fprintf(stderr, "Failed to mmap ELF file '%s': %s\n",
451             obj_file.c_str(), strerror(errno));
452     return false;
453   }
454   map_wrapper->set(obj_base, st.st_size);
455   *elf_header = obj_base;
456   if (!IsValidElf(*elf_header)) {
457     fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
458     return false;
459   }
460   return true;
461 }
462 
463 // Get the endianness of ELF_HEADER. If it's invalid, return false.
464 template<typename ElfClass>
ElfEndianness(const typename ElfClass::Ehdr * elf_header,bool * big_endian)465 bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
466                    bool* big_endian) {
467   if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
468     *big_endian = false;
469     return true;
470   }
471   if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
472     *big_endian = true;
473     return true;
474   }
475 
476   fprintf(stderr, "bad data encoding in ELF header: %d\n",
477           elf_header->e_ident[EI_DATA]);
478   return false;
479 }
480 
481 // Given |left_abspath|, find the absolute path for |right_path| and see if the
482 // two absolute paths are the same.
IsSameFile(const char * left_abspath,const string & right_path)483 bool IsSameFile(const char* left_abspath, const string& right_path) {
484   char right_abspath[PATH_MAX];
485   if (!realpath(right_path.c_str(), right_abspath))
486     return false;
487   return strcmp(left_abspath, right_abspath) == 0;
488 }
489 
490 // Read the .gnu_debuglink and get the debug file name. If anything goes
491 // wrong, return an empty string.
ReadDebugLink(const uint8_t * debuglink,const size_t debuglink_size,const bool big_endian,const string & obj_file,const std::vector<string> & debug_dirs)492 string ReadDebugLink(const uint8_t *debuglink,
493                      const size_t debuglink_size,
494                      const bool big_endian,
495                      const string& obj_file,
496                      const std::vector<string>& debug_dirs) {
497   // Include '\0' + CRC32 (4 bytes).
498   size_t debuglink_len = strlen(reinterpret_cast<const char *>(debuglink)) + 5;
499   debuglink_len = 4 * ((debuglink_len + 3) / 4);  // Round up to 4 bytes.
500 
501   // Sanity check.
502   if (debuglink_len != debuglink_size) {
503     fprintf(stderr, "Mismatched .gnu_debuglink string / section size: "
504             "%zx %zx\n", debuglink_len, debuglink_size);
505     return string();
506   }
507 
508   char obj_file_abspath[PATH_MAX];
509   if (!realpath(obj_file.c_str(), obj_file_abspath)) {
510     fprintf(stderr, "Cannot resolve absolute path for %s\n", obj_file.c_str());
511     return string();
512   }
513 
514   std::vector<string> searched_paths;
515   string debuglink_path;
516   std::vector<string>::const_iterator it;
517   for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) {
518     const string& debug_dir = *it;
519     debuglink_path = debug_dir + "/" +
520                      reinterpret_cast<const char *>(debuglink);
521 
522     // There is the annoying case of /path/to/foo.so having foo.so as the
523     // debug link file name. Thus this may end up opening /path/to/foo.so again,
524     // and there is a small chance of the two files having the same CRC.
525     if (IsSameFile(obj_file_abspath, debuglink_path))
526       continue;
527 
528     searched_paths.push_back(debug_dir);
529     int debuglink_fd = open(debuglink_path.c_str(), O_RDONLY);
530     if (debuglink_fd < 0)
531       continue;
532 
533     FDWrapper debuglink_fd_wrapper(debuglink_fd);
534 
535     // The CRC is the last 4 bytes in |debuglink|.
536     const dwarf2reader::Endianness endianness = big_endian ?
537         dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
538     dwarf2reader::ByteReader byte_reader(endianness);
539     uint32_t expected_crc =
540         byte_reader.ReadFourBytes(&debuglink[debuglink_size - 4]);
541 
542     uint32_t actual_crc = 0;
543     while (true) {
544       const size_t kReadSize = 4096;
545       char buf[kReadSize];
546       ssize_t bytes_read = HANDLE_EINTR(read(debuglink_fd, &buf, kReadSize));
547       if (bytes_read < 0) {
548         fprintf(stderr, "Error reading debug ELF file %s.\n",
549                 debuglink_path.c_str());
550         return string();
551       }
552       if (bytes_read == 0)
553         break;
554       actual_crc = google_breakpad::UpdateCrc32(actual_crc, buf, bytes_read);
555     }
556     if (actual_crc != expected_crc) {
557       fprintf(stderr, "Error reading debug ELF file - CRC32 mismatch: %s\n",
558               debuglink_path.c_str());
559       continue;
560     }
561 
562     // Found debug file.
563     return debuglink_path;
564   }
565 
566   // Not found case.
567   fprintf(stderr, "Failed to find debug ELF file for '%s' after trying:\n",
568           obj_file.c_str());
569   for (it = searched_paths.begin(); it < searched_paths.end(); ++it) {
570     const string& debug_dir = *it;
571     fprintf(stderr, "  %s/%s\n", debug_dir.c_str(), debuglink);
572   }
573   return string();
574 }
575 
576 //
577 // LoadSymbolsInfo
578 //
579 // Holds the state between the two calls to LoadSymbols() in case it's necessary
580 // to follow the .gnu_debuglink section and load debug information from a
581 // different file.
582 //
583 template<typename ElfClass>
584 class LoadSymbolsInfo {
585  public:
586   typedef typename ElfClass::Addr Addr;
587 
LoadSymbolsInfo(const std::vector<string> & dbg_dirs)588   explicit LoadSymbolsInfo(const std::vector<string>& dbg_dirs) :
589     debug_dirs_(dbg_dirs),
590     has_loading_addr_(false) {}
591 
592   // Keeps track of which sections have been loaded so sections don't
593   // accidentally get loaded twice from two different files.
LoadedSection(const string & section)594   void LoadedSection(const string &section) {
595     if (loaded_sections_.count(section) == 0) {
596       loaded_sections_.insert(section);
597     } else {
598       fprintf(stderr, "Section %s has already been loaded.\n",
599               section.c_str());
600     }
601   }
602 
603   // The ELF file and linked debug file are expected to have the same preferred
604   // loading address.
set_loading_addr(Addr addr,const string & filename)605   void set_loading_addr(Addr addr, const string &filename) {
606     if (!has_loading_addr_) {
607       loading_addr_ = addr;
608       loaded_file_ = filename;
609       return;
610     }
611 
612     if (addr != loading_addr_) {
613       fprintf(stderr,
614               "ELF file '%s' and debug ELF file '%s' "
615               "have different load addresses.\n",
616               loaded_file_.c_str(), filename.c_str());
617       assert(false);
618     }
619   }
620 
621   // Setters and getters
debug_dirs() const622   const std::vector<string>& debug_dirs() const {
623     return debug_dirs_;
624   }
625 
debuglink_file() const626   string debuglink_file() const {
627     return debuglink_file_;
628   }
set_debuglink_file(string file)629   void set_debuglink_file(string file) {
630     debuglink_file_ = file;
631   }
632 
633  private:
634   const std::vector<string>& debug_dirs_; // Directories in which to
635                                           // search for the debug ELF file.
636 
637   string debuglink_file_;  // Full path to the debug ELF file.
638 
639   bool has_loading_addr_;  // Indicate if LOADING_ADDR_ is valid.
640 
641   Addr loading_addr_;  // Saves the preferred loading address from the
642                        // first call to LoadSymbols().
643 
644   string loaded_file_;  // Name of the file loaded from the first call to
645                         // LoadSymbols().
646 
647   std::set<string> loaded_sections_;  // Tracks the Loaded ELF sections
648                                       // between calls to LoadSymbols().
649 };
650 
651 template<typename ElfClass>
LoadSymbols(const string & obj_file,const bool big_endian,const typename ElfClass::Ehdr * elf_header,const bool read_gnu_debug_link,LoadSymbolsInfo<ElfClass> * info,const DumpOptions & options,Module * module)652 bool LoadSymbols(const string& obj_file,
653                  const bool big_endian,
654                  const typename ElfClass::Ehdr* elf_header,
655                  const bool read_gnu_debug_link,
656                  LoadSymbolsInfo<ElfClass>* info,
657                  const DumpOptions& options,
658                  Module* module) {
659   typedef typename ElfClass::Addr Addr;
660   typedef typename ElfClass::Phdr Phdr;
661   typedef typename ElfClass::Shdr Shdr;
662 
663   Addr loading_addr = GetLoadingAddress<ElfClass>(
664       GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
665       elf_header->e_phnum);
666   module->SetLoadAddress(loading_addr);
667   info->set_loading_addr(loading_addr, obj_file);
668 
669   // Allow filtering of extraneous debug information in partitioned libraries.
670   // Such libraries contain debug information for all libraries extracted from
671   // the same combined library, implying extensive duplication.
672   vector<Module::Range> address_ranges = GetPtLoadSegmentRanges<ElfClass>(
673       GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
674       elf_header->e_phnum);
675   module->SetAddressRanges(address_ranges);
676 
677   const Shdr* sections =
678       GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
679   const Shdr* section_names = sections + elf_header->e_shstrndx;
680   const char* names =
681       GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
682   const char *names_end = names + section_names->sh_size;
683   bool found_debug_info_section = false;
684   bool found_usable_info = false;
685 
686   if (options.symbol_data != ONLY_CFI) {
687 #ifndef NO_STABS_SUPPORT
688     // Look for STABS debugging information, and load it if present.
689     const Shdr* stab_section =
690       FindElfSectionByName<ElfClass>(".stab", SHT_PROGBITS,
691                                      sections, names, names_end,
692                                      elf_header->e_shnum);
693     if (stab_section) {
694       const Shdr* stabstr_section = stab_section->sh_link + sections;
695       if (stabstr_section) {
696         found_debug_info_section = true;
697         found_usable_info = true;
698         info->LoadedSection(".stab");
699         if (!LoadStabs<ElfClass>(elf_header, stab_section, stabstr_section,
700                                  big_endian, module)) {
701           fprintf(stderr, "%s: \".stab\" section found, but failed to load"
702                   " STABS debugging information\n", obj_file.c_str());
703         }
704       }
705     }
706 #endif  // NO_STABS_SUPPORT
707 
708     // Look for DWARF debugging information, and load it if present.
709     const Shdr* dwarf_section =
710       FindElfSectionByName<ElfClass>(".debug_info", SHT_PROGBITS,
711                                      sections, names, names_end,
712                                      elf_header->e_shnum);
713 
714     // .debug_info section type is SHT_PROGBITS for mips on pnacl toolchains,
715     // but MIPS_DWARF for regular gnu toolchains, so both need to be checked
716     if (elf_header->e_machine == EM_MIPS && !dwarf_section) {
717       dwarf_section =
718         FindElfSectionByName<ElfClass>(".debug_info", SHT_MIPS_DWARF,
719                                        sections, names, names_end,
720                                        elf_header->e_shnum);
721     }
722 
723     if (dwarf_section) {
724       found_debug_info_section = true;
725       found_usable_info = true;
726       info->LoadedSection(".debug_info");
727       if (!LoadDwarf<ElfClass>(obj_file, elf_header, big_endian,
728                                options.handle_inter_cu_refs, module)) {
729         fprintf(stderr, "%s: \".debug_info\" section found, but failed to load "
730                 "DWARF debugging information\n", obj_file.c_str());
731       }
732     }
733 
734     // See if there are export symbols available.
735     const Shdr* symtab_section =
736         FindElfSectionByName<ElfClass>(".symtab", SHT_SYMTAB,
737                                        sections, names, names_end,
738                                        elf_header->e_shnum);
739     const Shdr* strtab_section =
740         FindElfSectionByName<ElfClass>(".strtab", SHT_STRTAB,
741                                        sections, names, names_end,
742                                        elf_header->e_shnum);
743     if (symtab_section && strtab_section) {
744       info->LoadedSection(".symtab");
745 
746       const uint8_t* symtab =
747           GetOffset<ElfClass, uint8_t>(elf_header,
748                                        symtab_section->sh_offset);
749       const uint8_t* strtab =
750           GetOffset<ElfClass, uint8_t>(elf_header,
751                                        strtab_section->sh_offset);
752       bool result =
753           ELFSymbolsToModule(symtab,
754                              symtab_section->sh_size,
755                              strtab,
756                              strtab_section->sh_size,
757                              big_endian,
758                              ElfClass::kAddrSize,
759                              module);
760       found_usable_info = found_usable_info || result;
761     } else {
762       // Look in dynsym only if full symbol table was not available.
763       const Shdr* dynsym_section =
764           FindElfSectionByName<ElfClass>(".dynsym", SHT_DYNSYM,
765                                          sections, names, names_end,
766                                          elf_header->e_shnum);
767       const Shdr* dynstr_section =
768           FindElfSectionByName<ElfClass>(".dynstr", SHT_STRTAB,
769                                          sections, names, names_end,
770                                          elf_header->e_shnum);
771       if (dynsym_section && dynstr_section) {
772         info->LoadedSection(".dynsym");
773 
774         const uint8_t* dynsyms =
775             GetOffset<ElfClass, uint8_t>(elf_header,
776                                          dynsym_section->sh_offset);
777         const uint8_t* dynstrs =
778             GetOffset<ElfClass, uint8_t>(elf_header,
779                                          dynstr_section->sh_offset);
780         bool result =
781             ELFSymbolsToModule(dynsyms,
782                                dynsym_section->sh_size,
783                                dynstrs,
784                                dynstr_section->sh_size,
785                                big_endian,
786                                ElfClass::kAddrSize,
787                                module);
788         found_usable_info = found_usable_info || result;
789       }
790     }
791   }
792 
793   if (options.symbol_data != NO_CFI) {
794     // Dwarf Call Frame Information (CFI) is actually independent from
795     // the other DWARF debugging information, and can be used alone.
796     const Shdr* dwarf_cfi_section =
797         FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS,
798                                        sections, names, names_end,
799                                        elf_header->e_shnum);
800 
801     // .debug_frame section type is SHT_PROGBITS for mips on pnacl toolchains,
802     // but MIPS_DWARF for regular gnu toolchains, so both need to be checked
803     if (elf_header->e_machine == EM_MIPS && !dwarf_cfi_section) {
804       dwarf_cfi_section =
805           FindElfSectionByName<ElfClass>(".debug_frame", SHT_MIPS_DWARF,
806                                         sections, names, names_end,
807                                         elf_header->e_shnum);
808     }
809 
810     if (dwarf_cfi_section) {
811       // Ignore the return value of this function; even without call frame
812       // information, the other debugging information could be perfectly
813       // useful.
814       info->LoadedSection(".debug_frame");
815       bool result =
816           LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
817                                  dwarf_cfi_section, false, 0, 0, big_endian,
818                                  module);
819       found_usable_info = found_usable_info || result;
820     }
821 
822     // Linux C++ exception handling information can also provide
823     // unwinding data.
824     const Shdr* eh_frame_section =
825         FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS,
826                                        sections, names, names_end,
827                                        elf_header->e_shnum);
828     if (eh_frame_section) {
829       // Pointers in .eh_frame data may be relative to the base addresses of
830       // certain sections. Provide those sections if present.
831       const Shdr* got_section =
832           FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS,
833                                          sections, names, names_end,
834                                          elf_header->e_shnum);
835       const Shdr* text_section =
836           FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS,
837                                          sections, names, names_end,
838                                          elf_header->e_shnum);
839       info->LoadedSection(".eh_frame");
840       // As above, ignore the return value of this function.
841       bool result =
842           LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame",
843                                  eh_frame_section, true,
844                                  got_section, text_section, big_endian, module);
845       found_usable_info = found_usable_info || result;
846     }
847   }
848 
849   if (!found_debug_info_section) {
850     fprintf(stderr, "%s: file contains no debugging information"
851             " (no \".stab\" or \".debug_info\" sections)\n",
852             obj_file.c_str());
853 
854     // Failed, but maybe there's a .gnu_debuglink section?
855     if (read_gnu_debug_link) {
856       const Shdr* gnu_debuglink_section
857           = FindElfSectionByName<ElfClass>(".gnu_debuglink", SHT_PROGBITS,
858                                            sections, names,
859                                            names_end, elf_header->e_shnum);
860       if (gnu_debuglink_section) {
861         if (!info->debug_dirs().empty()) {
862           const uint8_t *debuglink_contents =
863               GetOffset<ElfClass, uint8_t>(elf_header,
864                                            gnu_debuglink_section->sh_offset);
865           string debuglink_file =
866               ReadDebugLink(debuglink_contents,
867                             gnu_debuglink_section->sh_size,
868                             big_endian,
869                             obj_file,
870                             info->debug_dirs());
871           info->set_debuglink_file(debuglink_file);
872         } else {
873           fprintf(stderr, ".gnu_debuglink section found in '%s', "
874                   "but no debug path specified.\n", obj_file.c_str());
875         }
876       } else {
877         fprintf(stderr, "%s does not contain a .gnu_debuglink section.\n",
878                 obj_file.c_str());
879       }
880     } else {
881       // Return true if some usable information was found, since the caller
882       // doesn't want to use .gnu_debuglink.
883       return found_usable_info;
884     }
885 
886     // No debug info was found, let the user try again with .gnu_debuglink
887     // if present.
888     return false;
889   }
890 
891   return true;
892 }
893 
894 // Return the breakpad symbol file identifier for the architecture of
895 // ELF_HEADER.
896 template<typename ElfClass>
ElfArchitecture(const typename ElfClass::Ehdr * elf_header)897 const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
898   typedef typename ElfClass::Half Half;
899   Half arch = elf_header->e_machine;
900   switch (arch) {
901     case EM_386:        return "x86";
902     case EM_ARM:        return "arm";
903     case EM_AARCH64:    return "arm64";
904     case EM_MIPS:       return "mips";
905     case EM_PPC64:      return "ppc64";
906     case EM_PPC:        return "ppc";
907     case EM_S390:       return "s390";
908     case EM_SPARC:      return "sparc";
909     case EM_SPARCV9:    return "sparcv9";
910     case EM_X86_64:     return "x86_64";
911     default: return NULL;
912   }
913 }
914 
915 template<typename ElfClass>
SanitizeDebugFile(const typename ElfClass::Ehdr * debug_elf_header,const string & debuglink_file,const string & obj_filename,const char * obj_file_architecture,const bool obj_file_is_big_endian)916 bool SanitizeDebugFile(const typename ElfClass::Ehdr* debug_elf_header,
917                        const string& debuglink_file,
918                        const string& obj_filename,
919                        const char* obj_file_architecture,
920                        const bool obj_file_is_big_endian) {
921   const char* debug_architecture =
922       ElfArchitecture<ElfClass>(debug_elf_header);
923   if (!debug_architecture) {
924     fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
925             debuglink_file.c_str(), debug_elf_header->e_machine);
926     return false;
927   }
928   if (strcmp(obj_file_architecture, debug_architecture)) {
929     fprintf(stderr, "%s with ELF machine architecture %s does not match "
930             "%s with ELF architecture %s\n",
931             debuglink_file.c_str(), debug_architecture,
932             obj_filename.c_str(), obj_file_architecture);
933     return false;
934   }
935   bool debug_big_endian;
936   if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
937     return false;
938   if (debug_big_endian != obj_file_is_big_endian) {
939     fprintf(stderr, "%s and %s does not match in endianness\n",
940             obj_filename.c_str(), debuglink_file.c_str());
941     return false;
942   }
943   return true;
944 }
945 
946 template<typename ElfClass>
InitModuleForElfClass(const typename ElfClass::Ehdr * elf_header,const string & obj_filename,const string & obj_os,scoped_ptr<Module> & module)947 bool InitModuleForElfClass(const typename ElfClass::Ehdr* elf_header,
948                            const string& obj_filename,
949                            const string& obj_os,
950                            scoped_ptr<Module>& module) {
951   PageAllocator allocator;
952   wasteful_vector<uint8_t> identifier(&allocator, kDefaultBuildIdSize);
953   if (!FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
954     fprintf(stderr, "%s: unable to generate file identifier\n",
955             obj_filename.c_str());
956     return false;
957   }
958 
959   const char *architecture = ElfArchitecture<ElfClass>(elf_header);
960   if (!architecture) {
961     fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
962             obj_filename.c_str(), elf_header->e_machine);
963     return false;
964   }
965 
966   char name_buf[NAME_MAX] = {};
967   std::string name = google_breakpad::ElfFileSoNameFromMappedFile(
968                          elf_header, name_buf, sizeof(name_buf))
969                          ? name_buf
970                          : google_breakpad::BaseName(obj_filename);
971 
972   // Add an extra "0" at the end.  PDB files on Windows have an 'age'
973   // number appended to the end of the file identifier; this isn't
974   // really used or necessary on other platforms, but be consistent.
975   string id = FileID::ConvertIdentifierToUUIDString(identifier) + "0";
976   // This is just the raw Build ID in hex.
977   string code_id = FileID::ConvertIdentifierToString(identifier);
978 
979   module.reset(new Module(name, obj_os, architecture, id, code_id));
980 
981   return true;
982 }
983 
984 template<typename ElfClass>
ReadSymbolDataElfClass(const typename ElfClass::Ehdr * elf_header,const string & obj_filename,const string & obj_os,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** out_module)985 bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
986                             const string& obj_filename,
987                             const string& obj_os,
988                             const std::vector<string>& debug_dirs,
989                             const DumpOptions& options,
990                             Module** out_module) {
991   typedef typename ElfClass::Ehdr Ehdr;
992 
993   *out_module = NULL;
994 
995   scoped_ptr<Module> module;
996   if (!InitModuleForElfClass<ElfClass>(elf_header, obj_filename, obj_os,
997                                        module)) {
998     return false;
999   }
1000 
1001   // Figure out what endianness this file is.
1002   bool big_endian;
1003   if (!ElfEndianness<ElfClass>(elf_header, &big_endian))
1004     return false;
1005 
1006   LoadSymbolsInfo<ElfClass> info(debug_dirs);
1007   if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
1008                              !debug_dirs.empty(), &info,
1009                              options, module.get())) {
1010     const string debuglink_file = info.debuglink_file();
1011     if (debuglink_file.empty())
1012       return false;
1013 
1014     // Load debuglink ELF file.
1015     fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
1016     MmapWrapper debug_map_wrapper;
1017     Ehdr* debug_elf_header = NULL;
1018     if (!LoadELF(debuglink_file, &debug_map_wrapper,
1019                  reinterpret_cast<void**>(&debug_elf_header)) ||
1020         !SanitizeDebugFile<ElfClass>(debug_elf_header, debuglink_file,
1021                                      obj_filename,
1022                                      module->architecture().c_str(),
1023                                      big_endian)) {
1024       return false;
1025     }
1026 
1027     if (!LoadSymbols<ElfClass>(debuglink_file, big_endian,
1028                                debug_elf_header, false, &info,
1029                                options, module.get())) {
1030       return false;
1031     }
1032   }
1033 
1034   *out_module = module.release();
1035   return true;
1036 }
1037 
1038 }  // namespace
1039 
1040 namespace google_breakpad {
1041 
1042 // Not explicitly exported, but not static so it can be used in unit tests.
ReadSymbolDataInternal(const uint8_t * obj_file,const string & obj_filename,const string & obj_os,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** module)1043 bool ReadSymbolDataInternal(const uint8_t* obj_file,
1044                             const string& obj_filename,
1045                             const string& obj_os,
1046                             const std::vector<string>& debug_dirs,
1047                             const DumpOptions& options,
1048                             Module** module) {
1049   if (!IsValidElf(obj_file)) {
1050     fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
1051     return false;
1052   }
1053 
1054   int elfclass = ElfClass(obj_file);
1055   if (elfclass == ELFCLASS32) {
1056     return ReadSymbolDataElfClass<ElfClass32>(
1057         reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, obj_os,
1058         debug_dirs, options, module);
1059   }
1060   if (elfclass == ELFCLASS64) {
1061     return ReadSymbolDataElfClass<ElfClass64>(
1062         reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, obj_os,
1063         debug_dirs, options, module);
1064   }
1065 
1066   return false;
1067 }
1068 
WriteSymbolFile(const string & load_path,const string & obj_file,const string & obj_os,const std::vector<string> & debug_dirs,const DumpOptions & options,std::ostream & sym_stream)1069 bool WriteSymbolFile(const string &load_path,
1070                      const string &obj_file,
1071                      const string &obj_os,
1072                      const std::vector<string>& debug_dirs,
1073                      const DumpOptions& options,
1074                      std::ostream &sym_stream) {
1075   Module* module;
1076   if (!ReadSymbolData(load_path, obj_file, obj_os, debug_dirs, options,
1077                       &module))
1078     return false;
1079 
1080   bool result = module->Write(sym_stream, options.symbol_data);
1081   delete module;
1082   return result;
1083 }
1084 
1085 // Read the selected object file's debugging information, and write out the
1086 // header only to |stream|. Return true on success; if an error occurs, report
1087 // it and return false.
WriteSymbolFileHeader(const string & load_path,const string & obj_file,const string & obj_os,std::ostream & sym_stream)1088 bool WriteSymbolFileHeader(const string& load_path,
1089                            const string& obj_file,
1090                            const string& obj_os,
1091                            std::ostream &sym_stream) {
1092   MmapWrapper map_wrapper;
1093   void* elf_header = NULL;
1094   if (!LoadELF(load_path, &map_wrapper, &elf_header)) {
1095     fprintf(stderr, "Could not load ELF file: %s\n", obj_file.c_str());
1096     return false;
1097   }
1098 
1099   if (!IsValidElf(elf_header)) {
1100     fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
1101     return false;
1102   }
1103 
1104   int elfclass = ElfClass(elf_header);
1105   scoped_ptr<Module> module;
1106   if (elfclass == ELFCLASS32) {
1107     if (!InitModuleForElfClass<ElfClass32>(
1108         reinterpret_cast<const Elf32_Ehdr*>(elf_header), obj_file, obj_os,
1109         module)) {
1110       fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str());
1111       return false;
1112     }
1113   } else if (elfclass == ELFCLASS64) {
1114     if (!InitModuleForElfClass<ElfClass64>(
1115         reinterpret_cast<const Elf64_Ehdr*>(elf_header), obj_file, obj_os,
1116         module)) {
1117       fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str());
1118       return false;
1119     }
1120   } else {
1121     fprintf(stderr, "Unsupported module file: %s\n", obj_file.c_str());
1122     return false;
1123   }
1124 
1125   return module->Write(sym_stream, ALL_SYMBOL_DATA);
1126 }
1127 
ReadSymbolData(const string & load_path,const string & obj_file,const string & obj_os,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** module)1128 bool ReadSymbolData(const string& load_path,
1129                     const string& obj_file,
1130                     const string& obj_os,
1131                     const std::vector<string>& debug_dirs,
1132                     const DumpOptions& options,
1133                     Module** module) {
1134   MmapWrapper map_wrapper;
1135   void* elf_header = NULL;
1136   if (!LoadELF(load_path, &map_wrapper, &elf_header))
1137     return false;
1138 
1139   return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
1140                                 obj_file, obj_os, debug_dirs, options, module);
1141 }
1142 
1143 }  // namespace google_breakpad
1144