1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 
4 // Copyright (c) 2006, 2011, 2012 Google Inc.
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are
9 // met:
10 //
11 //     * Redistributions of source code must retain the above copyright
12 // notice, this list of conditions and the following disclaimer.
13 //     * Redistributions in binary form must reproduce the above
14 // copyright notice, this list of conditions and the following disclaimer
15 // in the documentation and/or other materials provided with the
16 // distribution.
17 //     * Neither the name of Google Inc. nor the names of its
18 // contributors may be used to endorse or promote products derived from
19 // this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
34 
35 // (derived from)
36 // dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
37 // Find all the debugging info in a file and dump it as a Breakpad symbol file.
38 //
39 // dump_symbols.h: Read debugging information from an ELF file, and write
40 // it out as a Breakpad symbol file.
41 
42 // This file is derived from the following files in
43 // toolkit/crashreporter/google-breakpad:
44 //   src/common/linux/dump_symbols.cc
45 //   src/common/linux/elfutils.cc
46 //   src/common/linux/file_id.cc
47 
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <libgen.h>
51 #include <stdio.h>
52 #include <string.h>
53 #include <sys/mman.h>
54 #include <sys/stat.h>
55 #include <unistd.h>
56 #include <arpa/inet.h>
57 
58 #include <set>
59 #include <string>
60 #include <vector>
61 
62 #include "mozilla/Assertions.h"
63 #include "mozilla/Sprintf.h"
64 
65 #include "PlatformMacros.h"
66 #include "LulCommonExt.h"
67 #include "LulDwarfExt.h"
68 #include "LulElfInt.h"
69 #include "LulMainInt.h"
70 
71 #if defined(GP_PLAT_arm_android) && !defined(SHT_ARM_EXIDX)
72 // bionic and older glibsc don't define it
73 #  define SHT_ARM_EXIDX (SHT_LOPROC + 1)
74 #endif
75 
76 #if (defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)) && \
77     !defined(SHT_X86_64_UNWIND)
78 // This is sometimes necessary on x86_64-android and x86_64-linux.
79 #  define SHT_X86_64_UNWIND 0x70000001
80 #endif
81 
82 // Old Linux header doesn't define EM_AARCH64
83 #ifndef EM_AARCH64
84 #  define EM_AARCH64 183
85 #endif
86 
87 // This namespace contains helper functions.
88 namespace {
89 
90 using lul::DwarfCFIToModule;
91 using lul::FindElfSectionByName;
92 using lul::GetOffset;
93 using lul::IsValidElf;
94 using lul::Module;
95 using lul::scoped_ptr;
96 using lul::Summariser;
97 using lul::UniqueStringUniverse;
98 using std::set;
99 using std::string;
100 using std::vector;
101 
102 //
103 // FDWrapper
104 //
105 // Wrapper class to make sure opened file is closed.
106 //
107 class FDWrapper {
108  public:
FDWrapper(int fd)109   explicit FDWrapper(int fd) : fd_(fd) {}
~FDWrapper()110   ~FDWrapper() {
111     if (fd_ != -1) close(fd_);
112   }
get()113   int get() { return fd_; }
release()114   int release() {
115     int fd = fd_;
116     fd_ = -1;
117     return fd;
118   }
119 
120  private:
121   int fd_;
122 };
123 
124 //
125 // MmapWrapper
126 //
127 // Wrapper class to make sure mapped regions are unmapped.
128 //
129 class MmapWrapper {
130  public:
MmapWrapper()131   MmapWrapper() : is_set_(false), base_(NULL), size_(0) {}
~MmapWrapper()132   ~MmapWrapper() {
133     if (is_set_ && base_ != NULL) {
134       MOZ_ASSERT(size_ > 0);
135       munmap(base_, size_);
136     }
137   }
set(void * mapped_address,size_t mapped_size)138   void set(void* mapped_address, size_t mapped_size) {
139     is_set_ = true;
140     base_ = mapped_address;
141     size_ = mapped_size;
142   }
release()143   void release() {
144     MOZ_ASSERT(is_set_);
145     is_set_ = false;
146     base_ = NULL;
147     size_ = 0;
148   }
149 
150  private:
151   bool is_set_;
152   void* base_;
153   size_t size_;
154 };
155 
156 // Set NUM_DW_REGNAMES to be the number of Dwarf register names
157 // appropriate to the machine architecture given in HEADER.  Return
158 // true on success, or false if HEADER's machine architecture is not
159 // supported.
160 template <typename ElfClass>
DwarfCFIRegisterNames(const typename ElfClass::Ehdr * elf_header,unsigned int * num_dw_regnames)161 bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
162                            unsigned int* num_dw_regnames) {
163   switch (elf_header->e_machine) {
164     case EM_386:
165       *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386();
166       return true;
167     case EM_ARM:
168       *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM();
169       return true;
170     case EM_X86_64:
171       *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64();
172       return true;
173     case EM_MIPS:
174       *num_dw_regnames = DwarfCFIToModule::RegisterNames::MIPS();
175       return true;
176     case EM_AARCH64:
177       *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM64();
178       return true;
179     default:
180       MOZ_ASSERT(0);
181       return false;
182   }
183 }
184 
185 template <typename ElfClass>
LoadDwarfCFI(const string & dwarf_filename,const typename ElfClass::Ehdr * elf_header,const char * section_name,const typename ElfClass::Shdr * section,const bool eh_frame,const typename ElfClass::Shdr * got_section,const typename ElfClass::Shdr * text_section,const bool big_endian,SecMap * smap,uintptr_t text_bias,UniqueStringUniverse * usu,void (* log)(const char *))186 bool LoadDwarfCFI(const string& dwarf_filename,
187                   const typename ElfClass::Ehdr* elf_header,
188                   const char* section_name,
189                   const typename ElfClass::Shdr* section, const bool eh_frame,
190                   const typename ElfClass::Shdr* got_section,
191                   const typename ElfClass::Shdr* text_section,
192                   const bool big_endian, SecMap* smap, uintptr_t text_bias,
193                   UniqueStringUniverse* usu, void (*log)(const char*)) {
194   // Find the appropriate set of register names for this file's
195   // architecture.
196   unsigned int num_dw_regs = 0;
197   if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) {
198     fprintf(stderr,
199             "%s: unrecognized ELF machine architecture '%d';"
200             " cannot convert DWARF call frame information\n",
201             dwarf_filename.c_str(), elf_header->e_machine);
202     return false;
203   }
204 
205   const lul::Endianness endianness =
206       big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE;
207 
208   // Find the call frame information and its size.
209   const char* cfi = GetOffset<ElfClass, char>(elf_header, section->sh_offset);
210   size_t cfi_size = section->sh_size;
211 
212   // Plug together the parser, handler, and their entourages.
213 
214   // Here's a summariser, which will receive the output of the
215   // parser, create summaries, and add them to |smap|.
216   Summariser summ(smap, text_bias, log);
217 
218   lul::ByteReader reader(endianness);
219   reader.SetAddressSize(ElfClass::kAddrSize);
220 
221   DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name);
222   DwarfCFIToModule handler(num_dw_regs, &module_reporter, &reader, usu, &summ);
223 
224   // Provide the base addresses for .eh_frame encoded pointers, if
225   // possible.
226   reader.SetCFIDataBase(section->sh_addr, cfi);
227   if (got_section) reader.SetDataBase(got_section->sh_addr);
228   if (text_section) reader.SetTextBase(text_section->sh_addr);
229 
230   lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename,
231                                               section_name);
232   lul::CallFrameInfo parser(cfi, cfi_size, &reader, &handler, &dwarf_reporter,
233                             eh_frame);
234   parser.Start();
235 
236   return true;
237 }
238 
LoadELF(const string & obj_file,MmapWrapper * map_wrapper,void ** elf_header)239 bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
240              void** elf_header) {
241   int obj_fd = open(obj_file.c_str(), O_RDONLY);
242   if (obj_fd < 0) {
243     fprintf(stderr, "Failed to open ELF file '%s': %s\n", obj_file.c_str(),
244             strerror(errno));
245     return false;
246   }
247   FDWrapper obj_fd_wrapper(obj_fd);
248   struct stat st;
249   if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
250     fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", obj_file.c_str(),
251             strerror(errno));
252     return false;
253   }
254   // Mapping it read-only is good enough.  In any case, mapping it
255   // read-write confuses Valgrind's debuginfo acquire/discard
256   // heuristics, making it hard to profile the profiler.
257   void* obj_base = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, obj_fd, 0);
258   if (obj_base == MAP_FAILED) {
259     fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", obj_file.c_str(),
260             strerror(errno));
261     return false;
262   }
263   map_wrapper->set(obj_base, st.st_size);
264   *elf_header = obj_base;
265   if (!IsValidElf(*elf_header)) {
266     fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
267     return false;
268   }
269   return true;
270 }
271 
272 // Get the endianness of ELF_HEADER. If it's invalid, return false.
273 template <typename ElfClass>
ElfEndianness(const typename ElfClass::Ehdr * elf_header,bool * big_endian)274 bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
275                    bool* big_endian) {
276   if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
277     *big_endian = false;
278     return true;
279   }
280   if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
281     *big_endian = true;
282     return true;
283   }
284 
285   fprintf(stderr, "bad data encoding in ELF header: %d\n",
286           elf_header->e_ident[EI_DATA]);
287   return false;
288 }
289 
290 //
291 // LoadSymbolsInfo
292 //
293 // Holds the state between the two calls to LoadSymbols() in case it's necessary
294 // to follow the .gnu_debuglink section and load debug information from a
295 // different file.
296 //
297 template <typename ElfClass>
298 class LoadSymbolsInfo {
299  public:
300   typedef typename ElfClass::Addr Addr;
301 
LoadSymbolsInfo(const vector<string> & dbg_dirs)302   explicit LoadSymbolsInfo(const vector<string>& dbg_dirs)
303       : debug_dirs_(dbg_dirs), has_loading_addr_(false) {}
304 
305   // Keeps track of which sections have been loaded so sections don't
306   // accidentally get loaded twice from two different files.
LoadedSection(const string & section)307   void LoadedSection(const string& section) {
308     if (loaded_sections_.count(section) == 0) {
309       loaded_sections_.insert(section);
310     } else {
311       fprintf(stderr, "Section %s has already been loaded.\n", section.c_str());
312     }
313   }
314 
debuglink_file() const315   string debuglink_file() const { return debuglink_file_; }
316 
317  private:
318   const vector<string>& debug_dirs_;  // Directories in which to
319                                       // search for the debug ELF file.
320 
321   string debuglink_file_;  // Full path to the debug ELF file.
322 
323   bool has_loading_addr_;  // Indicate if LOADING_ADDR_ is valid.
324 
325   set<string> loaded_sections_;  // Tracks the Loaded ELF sections
326                                  // between calls to LoadSymbols().
327 };
328 
329 // Find the preferred loading address of the binary.
330 template <typename ElfClass>
GetLoadingAddress(const typename ElfClass::Phdr * program_headers,int nheader)331 typename ElfClass::Addr GetLoadingAddress(
332     const typename ElfClass::Phdr* program_headers, int nheader) {
333   typedef typename ElfClass::Phdr Phdr;
334 
335   // For non-PIC executables (e_type == ET_EXEC), the load address is
336   // the start address of the first PT_LOAD segment.  (ELF requires
337   // the segments to be sorted by load address.)  For PIC executables
338   // and dynamic libraries (e_type == ET_DYN), this address will
339   // normally be zero.
340   for (int i = 0; i < nheader; ++i) {
341     const Phdr& header = program_headers[i];
342     if (header.p_type == PT_LOAD) return header.p_vaddr;
343   }
344   return 0;
345 }
346 
347 template <typename ElfClass>
LoadSymbols(const string & obj_file,const bool big_endian,const typename ElfClass::Ehdr * elf_header,const bool read_gnu_debug_link,LoadSymbolsInfo<ElfClass> * info,SecMap * smap,void * rx_avma,size_t rx_size,UniqueStringUniverse * usu,void (* log)(const char *))348 bool LoadSymbols(const string& obj_file, const bool big_endian,
349                  const typename ElfClass::Ehdr* elf_header,
350                  const bool read_gnu_debug_link,
351                  LoadSymbolsInfo<ElfClass>* info, SecMap* smap, void* rx_avma,
352                  size_t rx_size, UniqueStringUniverse* usu,
353                  void (*log)(const char*)) {
354   typedef typename ElfClass::Phdr Phdr;
355   typedef typename ElfClass::Shdr Shdr;
356 
357   char buf[500];
358   SprintfLiteral(buf, "LoadSymbols: BEGIN   %s\n", obj_file.c_str());
359   buf[sizeof(buf) - 1] = 0;
360   log(buf);
361 
362   // This is how the text bias is calculated.
363   // BEGIN CALCULATE BIAS
364   uintptr_t loading_addr = GetLoadingAddress<ElfClass>(
365       GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
366       elf_header->e_phnum);
367   uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr;
368   SprintfLiteral(buf, "LoadSymbols:   rx_avma=%llx, text_bias=%llx",
369                  (unsigned long long int)(uintptr_t)rx_avma,
370                  (unsigned long long int)text_bias);
371   buf[sizeof(buf) - 1] = 0;
372   log(buf);
373   // END CALCULATE BIAS
374 
375   const Shdr* sections =
376       GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
377   const Shdr* section_names = sections + elf_header->e_shstrndx;
378   const char* names =
379       GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
380   const char* names_end = names + section_names->sh_size;
381   bool found_usable_info = false;
382 
383   // Dwarf Call Frame Information (CFI) is actually independent from
384   // the other DWARF debugging information, and can be used alone.
385   const Shdr* dwarf_cfi_section =
386       FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS, sections,
387                                      names, names_end, elf_header->e_shnum);
388   if (dwarf_cfi_section) {
389     // Ignore the return value of this function; even without call frame
390     // information, the other debugging information could be perfectly
391     // useful.
392     info->LoadedSection(".debug_frame");
393     bool result = LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
394                                          dwarf_cfi_section, false, 0, 0,
395                                          big_endian, smap, text_bias, usu, log);
396     found_usable_info = found_usable_info || result;
397     if (result) log("LoadSymbols:   read CFI from .debug_frame");
398   }
399 
400   // Linux C++ exception handling information can also provide
401   // unwinding data.
402   const Shdr* eh_frame_section =
403       FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS, sections, names,
404                                      names_end, elf_header->e_shnum);
405 #if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
406   if (!eh_frame_section) {
407     // Possibly depending on which linker created libxul.so, on x86_64-linux
408     // and -android, .eh_frame may instead have the SHT_X86_64_UNWIND type.
409     eh_frame_section =
410         FindElfSectionByName<ElfClass>(".eh_frame", SHT_X86_64_UNWIND, sections,
411                                        names, names_end, elf_header->e_shnum);
412   }
413 #endif
414   if (eh_frame_section) {
415     // Pointers in .eh_frame data may be relative to the base addresses of
416     // certain sections. Provide those sections if present.
417     const Shdr* got_section = FindElfSectionByName<ElfClass>(
418         ".got", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum);
419     const Shdr* text_section = FindElfSectionByName<ElfClass>(
420         ".text", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum);
421     info->LoadedSection(".eh_frame");
422     // As above, ignore the return value of this function.
423     bool result = LoadDwarfCFI<ElfClass>(
424         obj_file, elf_header, ".eh_frame", eh_frame_section, true, got_section,
425         text_section, big_endian, smap, text_bias, usu, log);
426     found_usable_info = found_usable_info || result;
427     if (result) log("LoadSymbols:   read CFI from .eh_frame");
428   }
429 
430   SprintfLiteral(buf, "LoadSymbols: END     %s\n", obj_file.c_str());
431   buf[sizeof(buf) - 1] = 0;
432   log(buf);
433 
434   return found_usable_info;
435 }
436 
437 // Return the breakpad symbol file identifier for the architecture of
438 // ELF_HEADER.
439 template <typename ElfClass>
ElfArchitecture(const typename ElfClass::Ehdr * elf_header)440 const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
441   typedef typename ElfClass::Half Half;
442   Half arch = elf_header->e_machine;
443   switch (arch) {
444     case EM_386:
445       return "x86";
446     case EM_ARM:
447       return "arm";
448     case EM_AARCH64:
449       return "arm64";
450     case EM_MIPS:
451       return "mips";
452     case EM_PPC64:
453       return "ppc64";
454     case EM_PPC:
455       return "ppc";
456     case EM_S390:
457       return "s390";
458     case EM_SPARC:
459       return "sparc";
460     case EM_SPARCV9:
461       return "sparcv9";
462     case EM_X86_64:
463       return "x86_64";
464     default:
465       return NULL;
466   }
467 }
468 
469 // Format the Elf file identifier in IDENTIFIER as a UUID with the
470 // dashes removed.
FormatIdentifier(unsigned char identifier[16])471 string FormatIdentifier(unsigned char identifier[16]) {
472   char identifier_str[40];
473   lul::FileID::ConvertIdentifierToString(identifier, identifier_str,
474                                          sizeof(identifier_str));
475   string id_no_dash;
476   for (int i = 0; identifier_str[i] != '\0'; ++i)
477     if (identifier_str[i] != '-') id_no_dash += identifier_str[i];
478   // Add an extra "0" by the end.  PDB files on Windows have an 'age'
479   // number appended to the end of the file identifier; this isn't
480   // really used or necessary on other platforms, but be consistent.
481   id_no_dash += '0';
482   return id_no_dash;
483 }
484 
485 // Return the non-directory portion of FILENAME: the portion after the
486 // last slash, or the whole filename if there are no slashes.
BaseFileName(const string & filename)487 string BaseFileName(const string& filename) {
488   // Lots of copies!  basename's behavior is less than ideal.
489   char* c_filename = strdup(filename.c_str());
490   string base = basename(c_filename);
491   free(c_filename);
492   return base;
493 }
494 
495 template <typename ElfClass>
ReadSymbolDataElfClass(const typename ElfClass::Ehdr * elf_header,const string & obj_filename,const vector<string> & debug_dirs,SecMap * smap,void * rx_avma,size_t rx_size,UniqueStringUniverse * usu,void (* log)(const char *))496 bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
497                             const string& obj_filename,
498                             const vector<string>& debug_dirs, SecMap* smap,
499                             void* rx_avma, size_t rx_size,
500                             UniqueStringUniverse* usu,
501                             void (*log)(const char*)) {
502   typedef typename ElfClass::Ehdr Ehdr;
503 
504   unsigned char identifier[16];
505   if (!lul ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
506     fprintf(stderr, "%s: unable to generate file identifier\n",
507             obj_filename.c_str());
508     return false;
509   }
510 
511   const char* architecture = ElfArchitecture<ElfClass>(elf_header);
512   if (!architecture) {
513     fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
514             obj_filename.c_str(), elf_header->e_machine);
515     return false;
516   }
517 
518   // Figure out what endianness this file is.
519   bool big_endian;
520   if (!ElfEndianness<ElfClass>(elf_header, &big_endian)) return false;
521 
522   string name = BaseFileName(obj_filename);
523   string os = "Linux";
524   string id = FormatIdentifier(identifier);
525 
526   LoadSymbolsInfo<ElfClass> info(debug_dirs);
527   if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
528                              !debug_dirs.empty(), &info, smap, rx_avma, rx_size,
529                              usu, log)) {
530     const string debuglink_file = info.debuglink_file();
531     if (debuglink_file.empty()) return false;
532 
533     // Load debuglink ELF file.
534     fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
535     MmapWrapper debug_map_wrapper;
536     Ehdr* debug_elf_header = NULL;
537     if (!LoadELF(debuglink_file, &debug_map_wrapper,
538                  reinterpret_cast<void**>(&debug_elf_header)))
539       return false;
540     // Sanity checks to make sure everything matches up.
541     const char* debug_architecture =
542         ElfArchitecture<ElfClass>(debug_elf_header);
543     if (!debug_architecture) {
544       fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
545               debuglink_file.c_str(), debug_elf_header->e_machine);
546       return false;
547     }
548     if (strcmp(architecture, debug_architecture)) {
549       fprintf(stderr,
550               "%s with ELF machine architecture %s does not match "
551               "%s with ELF architecture %s\n",
552               debuglink_file.c_str(), debug_architecture, obj_filename.c_str(),
553               architecture);
554       return false;
555     }
556 
557     bool debug_big_endian;
558     if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
559       return false;
560     if (debug_big_endian != big_endian) {
561       fprintf(stderr, "%s and %s does not match in endianness\n",
562               obj_filename.c_str(), debuglink_file.c_str());
563       return false;
564     }
565 
566     if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian,
567                                debug_elf_header, false, &info, smap, rx_avma,
568                                rx_size, usu, log)) {
569       return false;
570     }
571   }
572 
573   return true;
574 }
575 
576 }  // namespace
577 
578 namespace lul {
579 
ReadSymbolDataInternal(const uint8_t * obj_file,const string & obj_filename,const vector<string> & debug_dirs,SecMap * smap,void * rx_avma,size_t rx_size,UniqueStringUniverse * usu,void (* log)(const char *))580 bool ReadSymbolDataInternal(const uint8_t* obj_file, const string& obj_filename,
581                             const vector<string>& debug_dirs, SecMap* smap,
582                             void* rx_avma, size_t rx_size,
583                             UniqueStringUniverse* usu,
584                             void (*log)(const char*)) {
585   if (!IsValidElf(obj_file)) {
586     fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
587     return false;
588   }
589 
590   int elfclass = ElfClass(obj_file);
591   if (elfclass == ELFCLASS32) {
592     return ReadSymbolDataElfClass<ElfClass32>(
593         reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, debug_dirs,
594         smap, rx_avma, rx_size, usu, log);
595   }
596   if (elfclass == ELFCLASS64) {
597     return ReadSymbolDataElfClass<ElfClass64>(
598         reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, debug_dirs,
599         smap, rx_avma, rx_size, usu, log);
600   }
601 
602   return false;
603 }
604 
ReadSymbolData(const string & obj_file,const vector<string> & debug_dirs,SecMap * smap,void * rx_avma,size_t rx_size,UniqueStringUniverse * usu,void (* log)(const char *))605 bool ReadSymbolData(const string& obj_file, const vector<string>& debug_dirs,
606                     SecMap* smap, void* rx_avma, size_t rx_size,
607                     UniqueStringUniverse* usu, void (*log)(const char*)) {
608   MmapWrapper map_wrapper;
609   void* elf_header = NULL;
610   if (!LoadELF(obj_file, &map_wrapper, &elf_header)) return false;
611 
612   return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
613                                 obj_file, debug_dirs, smap, rx_avma, rx_size,
614                                 usu, log);
615 }
616 
617 namespace {
618 
619 template <typename ElfClass>
FindElfClassSection(const char * elf_base,const char * section_name,typename ElfClass::Word section_type,const void ** section_start,int * section_size)620 void FindElfClassSection(const char* elf_base, const char* section_name,
621                          typename ElfClass::Word section_type,
622                          const void** section_start, int* section_size) {
623   typedef typename ElfClass::Ehdr Ehdr;
624   typedef typename ElfClass::Shdr Shdr;
625 
626   MOZ_ASSERT(elf_base);
627   MOZ_ASSERT(section_start);
628   MOZ_ASSERT(section_size);
629 
630   MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
631 
632   const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
633   MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
634 
635   const Shdr* sections =
636       GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
637   const Shdr* section_names = sections + elf_header->e_shstrndx;
638   const char* names =
639       GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
640   const char* names_end = names + section_names->sh_size;
641 
642   const Shdr* section =
643       FindElfSectionByName<ElfClass>(section_name, section_type, sections,
644                                      names, names_end, elf_header->e_shnum);
645 
646   if (section != NULL && section->sh_size > 0) {
647     *section_start = elf_base + section->sh_offset;
648     *section_size = section->sh_size;
649   }
650 }
651 
652 template <typename ElfClass>
FindElfClassSegment(const char * elf_base,typename ElfClass::Word segment_type,const void ** segment_start,int * segment_size)653 void FindElfClassSegment(const char* elf_base,
654                          typename ElfClass::Word segment_type,
655                          const void** segment_start, int* segment_size) {
656   typedef typename ElfClass::Ehdr Ehdr;
657   typedef typename ElfClass::Phdr Phdr;
658 
659   MOZ_ASSERT(elf_base);
660   MOZ_ASSERT(segment_start);
661   MOZ_ASSERT(segment_size);
662 
663   MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
664 
665   const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
666   MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
667 
668   const Phdr* phdrs =
669       GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff);
670 
671   for (int i = 0; i < elf_header->e_phnum; ++i) {
672     if (phdrs[i].p_type == segment_type) {
673       *segment_start = elf_base + phdrs[i].p_offset;
674       *segment_size = phdrs[i].p_filesz;
675       return;
676     }
677   }
678 }
679 
680 }  // namespace
681 
IsValidElf(const void * elf_base)682 bool IsValidElf(const void* elf_base) {
683   return strncmp(reinterpret_cast<const char*>(elf_base), ELFMAG, SELFMAG) == 0;
684 }
685 
ElfClass(const void * elf_base)686 int ElfClass(const void* elf_base) {
687   const ElfW(Ehdr)* elf_header = reinterpret_cast<const ElfW(Ehdr)*>(elf_base);
688 
689   return elf_header->e_ident[EI_CLASS];
690 }
691 
FindElfSection(const void * elf_mapped_base,const char * section_name,uint32_t section_type,const void ** section_start,int * section_size,int * elfclass)692 bool FindElfSection(const void* elf_mapped_base, const char* section_name,
693                     uint32_t section_type, const void** section_start,
694                     int* section_size, int* elfclass) {
695   MOZ_ASSERT(elf_mapped_base);
696   MOZ_ASSERT(section_start);
697   MOZ_ASSERT(section_size);
698 
699   *section_start = NULL;
700   *section_size = 0;
701 
702   if (!IsValidElf(elf_mapped_base)) return false;
703 
704   int cls = ElfClass(elf_mapped_base);
705   if (elfclass) {
706     *elfclass = cls;
707   }
708 
709   const char* elf_base = static_cast<const char*>(elf_mapped_base);
710 
711   if (cls == ELFCLASS32) {
712     FindElfClassSection<ElfClass32>(elf_base, section_name, section_type,
713                                     section_start, section_size);
714     return *section_start != NULL;
715   } else if (cls == ELFCLASS64) {
716     FindElfClassSection<ElfClass64>(elf_base, section_name, section_type,
717                                     section_start, section_size);
718     return *section_start != NULL;
719   }
720 
721   return false;
722 }
723 
FindElfSegment(const void * elf_mapped_base,uint32_t segment_type,const void ** segment_start,int * segment_size,int * elfclass)724 bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type,
725                     const void** segment_start, int* segment_size,
726                     int* elfclass) {
727   MOZ_ASSERT(elf_mapped_base);
728   MOZ_ASSERT(segment_start);
729   MOZ_ASSERT(segment_size);
730 
731   *segment_start = NULL;
732   *segment_size = 0;
733 
734   if (!IsValidElf(elf_mapped_base)) return false;
735 
736   int cls = ElfClass(elf_mapped_base);
737   if (elfclass) {
738     *elfclass = cls;
739   }
740 
741   const char* elf_base = static_cast<const char*>(elf_mapped_base);
742 
743   if (cls == ELFCLASS32) {
744     FindElfClassSegment<ElfClass32>(elf_base, segment_type, segment_start,
745                                     segment_size);
746     return *segment_start != NULL;
747   } else if (cls == ELFCLASS64) {
748     FindElfClassSegment<ElfClass64>(elf_base, segment_type, segment_start,
749                                     segment_size);
750     return *segment_start != NULL;
751   }
752 
753   return false;
754 }
755 
756 // (derived from)
757 // file_id.cc: Return a unique identifier for a file
758 //
759 // See file_id.h for documentation
760 //
761 
762 // ELF note name and desc are 32-bits word padded.
763 #define NOTE_PADDING(a) ((a + 3) & ~3)
764 
765 // These functions are also used inside the crashed process, so be safe
766 // and use the syscall/libc wrappers instead of direct syscalls or libc.
767 
768 template <typename ElfClass>
ElfClassBuildIDNoteIdentifier(const void * section,int length,uint8_t identifier[kMDGUIDSize])769 static bool ElfClassBuildIDNoteIdentifier(const void* section, int length,
770                                           uint8_t identifier[kMDGUIDSize]) {
771   typedef typename ElfClass::Nhdr Nhdr;
772 
773   const void* section_end = reinterpret_cast<const char*>(section) + length;
774   const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
775   while (reinterpret_cast<const void*>(note_header) < section_end) {
776     if (note_header->n_type == NT_GNU_BUILD_ID) break;
777     note_header = reinterpret_cast<const Nhdr*>(
778         reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
779         NOTE_PADDING(note_header->n_namesz) +
780         NOTE_PADDING(note_header->n_descsz));
781   }
782   if (reinterpret_cast<const void*>(note_header) >= section_end ||
783       note_header->n_descsz == 0) {
784     return false;
785   }
786 
787   const char* build_id = reinterpret_cast<const char*>(note_header) +
788                          sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
789   // Copy as many bits of the build ID as will fit
790   // into the GUID space.
791   memset(identifier, 0, kMDGUIDSize);
792   memcpy(identifier, build_id,
793          std::min(kMDGUIDSize, (size_t)note_header->n_descsz));
794 
795   return true;
796 }
797 
798 // Attempt to locate a .note.gnu.build-id section in an ELF binary
799 // and copy as many bytes of it as will fit into |identifier|.
FindElfBuildIDNote(const void * elf_mapped_base,uint8_t identifier[kMDGUIDSize])800 static bool FindElfBuildIDNote(const void* elf_mapped_base,
801                                uint8_t identifier[kMDGUIDSize]) {
802   void* note_section;
803   int note_size, elfclass;
804   if ((!FindElfSegment(elf_mapped_base, PT_NOTE, (const void**)&note_section,
805                        &note_size, &elfclass) ||
806        note_size == 0) &&
807       (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
808                        (const void**)&note_section, &note_size, &elfclass) ||
809        note_size == 0)) {
810     return false;
811   }
812 
813   if (elfclass == ELFCLASS32) {
814     return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size,
815                                                      identifier);
816   } else if (elfclass == ELFCLASS64) {
817     return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size,
818                                                      identifier);
819   }
820 
821   return false;
822 }
823 
824 // Attempt to locate the .text section of an ELF binary and generate
825 // a simple hash by XORing the first page worth of bytes into |identifier|.
HashElfTextSection(const void * elf_mapped_base,uint8_t identifier[kMDGUIDSize])826 static bool HashElfTextSection(const void* elf_mapped_base,
827                                uint8_t identifier[kMDGUIDSize]) {
828   void* text_section;
829   int text_size;
830   if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
831                       (const void**)&text_section, &text_size, NULL) ||
832       text_size == 0) {
833     return false;
834   }
835 
836   memset(identifier, 0, kMDGUIDSize);
837   const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
838   const uint8_t* ptr_end = ptr + std::min(text_size, 4096);
839   while (ptr < ptr_end) {
840     for (unsigned i = 0; i < kMDGUIDSize; i++) identifier[i] ^= ptr[i];
841     ptr += kMDGUIDSize;
842   }
843   return true;
844 }
845 
846 // static
ElfFileIdentifierFromMappedFile(const void * base,uint8_t identifier[kMDGUIDSize])847 bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
848                                              uint8_t identifier[kMDGUIDSize]) {
849   // Look for a build id note first.
850   if (FindElfBuildIDNote(base, identifier)) return true;
851 
852   // Fall back on hashing the first page of the text section.
853   return HashElfTextSection(base, identifier);
854 }
855 
856 // static
ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],char * buffer,int buffer_length)857 void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
858                                        char* buffer, int buffer_length) {
859   uint8_t identifier_swapped[kMDGUIDSize];
860 
861   // Endian-ness swap to match dump processor expectation.
862   memcpy(identifier_swapped, identifier, kMDGUIDSize);
863   uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
864   *data1 = htonl(*data1);
865   uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
866   *data2 = htons(*data2);
867   uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
868   *data3 = htons(*data3);
869 
870   int buffer_idx = 0;
871   for (unsigned int idx = 0;
872        (buffer_idx < buffer_length) && (idx < kMDGUIDSize); ++idx) {
873     int hi = (identifier_swapped[idx] >> 4) & 0x0F;
874     int lo = (identifier_swapped[idx]) & 0x0F;
875 
876     if (idx == 4 || idx == 6 || idx == 8 || idx == 10)
877       buffer[buffer_idx++] = '-';
878 
879     buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi;
880     buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo;
881   }
882 
883   // NULL terminate
884   buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0;
885 }
886 
887 }  // namespace lul
888