1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 
4 // Copyright (c) 2006, 2011, 2012 Google Inc.
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are
9 // met:
10 //
11 //     * Redistributions of source code must retain the above copyright
12 // notice, this list of conditions and the following disclaimer.
13 //     * Redistributions in binary form must reproduce the above
14 // copyright notice, this list of conditions and the following disclaimer
15 // in the documentation and/or other materials provided with the
16 // distribution.
17 //     * Neither the name of Google Inc. nor the names of its
18 // contributors may be used to endorse or promote products derived from
19 // this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
34 
35 // (derived from)
36 // dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
37 // Find all the debugging info in a file and dump it as a Breakpad symbol file.
38 //
39 // dump_symbols.h: Read debugging information from an ELF file, and write
40 // it out as a Breakpad symbol file.
41 
42 // This file is derived from the following files in
43 // toolkit/crashreporter/google-breakpad:
44 //   src/common/linux/dump_symbols.cc
45 //   src/common/linux/elfutils.cc
46 //   src/common/linux/file_id.cc
47 
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <libgen.h>
51 #include <stdio.h>
52 #include <string.h>
53 #include <sys/mman.h>
54 #include <sys/stat.h>
55 #include <unistd.h>
56 #include <arpa/inet.h>
57 
58 #include <cstdlib>
59 #include <set>
60 #include <string>
61 #include <vector>
62 
63 #include "mozilla/Assertions.h"
64 #include "mozilla/Sprintf.h"
65 
66 #include "PlatformMacros.h"
67 #include "LulCommonExt.h"
68 #include "LulDwarfExt.h"
69 #include "LulElfInt.h"
70 #include "LulMainInt.h"
71 
72 #if defined(GP_PLAT_arm_android) && !defined(SHT_ARM_EXIDX)
73 // bionic and older glibsc don't define it
74 #  define SHT_ARM_EXIDX (SHT_LOPROC + 1)
75 #endif
76 
77 // Old Linux header doesn't define EM_AARCH64
78 #ifndef EM_AARCH64
79 #  define EM_AARCH64 183
80 #endif
81 
82 // This namespace contains helper functions.
83 namespace {
84 
85 using lul::DwarfCFIToModule;
86 using lul::FindElfSectionByName;
87 using lul::GetOffset;
88 using lul::IsValidElf;
89 using lul::Module;
90 using lul::scoped_ptr;
91 using lul::Summariser;
92 using lul::UniqueStringUniverse;
93 using std::set;
94 using std::string;
95 using std::vector;
96 
97 //
98 // FDWrapper
99 //
100 // Wrapper class to make sure opened file is closed.
101 //
102 class FDWrapper {
103  public:
FDWrapper(int fd)104   explicit FDWrapper(int fd) : fd_(fd) {}
~FDWrapper()105   ~FDWrapper() {
106     if (fd_ != -1) close(fd_);
107   }
get()108   int get() { return fd_; }
release()109   int release() {
110     int fd = fd_;
111     fd_ = -1;
112     return fd;
113   }
114 
115  private:
116   int fd_;
117 };
118 
119 //
120 // MmapWrapper
121 //
122 // Wrapper class to make sure mapped regions are unmapped.
123 //
124 class MmapWrapper {
125  public:
MmapWrapper()126   MmapWrapper() : is_set_(false), base_(NULL), size_(0) {}
~MmapWrapper()127   ~MmapWrapper() {
128     if (is_set_ && base_ != NULL) {
129       MOZ_ASSERT(size_ > 0);
130       munmap(base_, size_);
131     }
132   }
set(void * mapped_address,size_t mapped_size)133   void set(void* mapped_address, size_t mapped_size) {
134     is_set_ = true;
135     base_ = mapped_address;
136     size_ = mapped_size;
137   }
release()138   void release() {
139     MOZ_ASSERT(is_set_);
140     is_set_ = false;
141     base_ = NULL;
142     size_ = 0;
143   }
144 
145  private:
146   bool is_set_;
147   void* base_;
148   size_t size_;
149 };
150 
151 // Set NUM_DW_REGNAMES to be the number of Dwarf register names
152 // appropriate to the machine architecture given in HEADER.  Return
153 // true on success, or false if HEADER's machine architecture is not
154 // supported.
155 template <typename ElfClass>
DwarfCFIRegisterNames(const typename ElfClass::Ehdr * elf_header,unsigned int * num_dw_regnames)156 bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
157                            unsigned int* num_dw_regnames) {
158   switch (elf_header->e_machine) {
159     case EM_386:
160       *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386();
161       return true;
162     case EM_ARM:
163       *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM();
164       return true;
165     case EM_X86_64:
166       *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64();
167       return true;
168     case EM_MIPS:
169       *num_dw_regnames = DwarfCFIToModule::RegisterNames::MIPS();
170       return true;
171     case EM_AARCH64:
172       *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM64();
173       return true;
174     default:
175       MOZ_ASSERT(0);
176       return false;
177   }
178 }
179 
180 template <typename ElfClass>
LoadDwarfCFI(const string & dwarf_filename,const typename ElfClass::Ehdr * elf_header,const char * section_name,const typename ElfClass::Shdr * section,const bool eh_frame,const typename ElfClass::Shdr * got_section,const typename ElfClass::Shdr * text_section,const bool big_endian,SecMap * smap,uintptr_t text_bias,UniqueStringUniverse * usu,void (* log)(const char *))181 bool LoadDwarfCFI(const string& dwarf_filename,
182                   const typename ElfClass::Ehdr* elf_header,
183                   const char* section_name,
184                   const typename ElfClass::Shdr* section, const bool eh_frame,
185                   const typename ElfClass::Shdr* got_section,
186                   const typename ElfClass::Shdr* text_section,
187                   const bool big_endian, SecMap* smap, uintptr_t text_bias,
188                   UniqueStringUniverse* usu, void (*log)(const char*)) {
189   // Find the appropriate set of register names for this file's
190   // architecture.
191   unsigned int num_dw_regs = 0;
192   if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) {
193     fprintf(stderr,
194             "%s: unrecognized ELF machine architecture '%d';"
195             " cannot convert DWARF call frame information\n",
196             dwarf_filename.c_str(), elf_header->e_machine);
197     return false;
198   }
199 
200   const lul::Endianness endianness =
201       big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE;
202 
203   // Find the call frame information and its size.
204   const char* cfi = GetOffset<ElfClass, char>(elf_header, section->sh_offset);
205   size_t cfi_size = section->sh_size;
206 
207   // Plug together the parser, handler, and their entourages.
208 
209   // Here's a summariser, which will receive the output of the
210   // parser, create summaries, and add them to |smap|.
211   Summariser summ(smap, text_bias, log);
212 
213   lul::ByteReader reader(endianness);
214   reader.SetAddressSize(ElfClass::kAddrSize);
215 
216   DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name);
217   DwarfCFIToModule handler(num_dw_regs, &module_reporter, &reader, usu, &summ);
218 
219   // Provide the base addresses for .eh_frame encoded pointers, if
220   // possible.
221   reader.SetCFIDataBase(section->sh_addr, cfi);
222   if (got_section) reader.SetDataBase(got_section->sh_addr);
223   if (text_section) reader.SetTextBase(text_section->sh_addr);
224 
225   lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename,
226                                               section_name);
227   lul::CallFrameInfo parser(cfi, cfi_size, &reader, &handler, &dwarf_reporter,
228                             eh_frame);
229   parser.Start();
230 
231   return true;
232 }
233 
LoadELF(const string & obj_file,MmapWrapper * map_wrapper,void ** elf_header)234 bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
235              void** elf_header) {
236   int obj_fd = open(obj_file.c_str(), O_RDONLY);
237   if (obj_fd < 0) {
238     fprintf(stderr, "Failed to open ELF file '%s': %s\n", obj_file.c_str(),
239             strerror(errno));
240     return false;
241   }
242   FDWrapper obj_fd_wrapper(obj_fd);
243   struct stat st;
244   if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
245     fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", obj_file.c_str(),
246             strerror(errno));
247     return false;
248   }
249   // Mapping it read-only is good enough.  In any case, mapping it
250   // read-write confuses Valgrind's debuginfo acquire/discard
251   // heuristics, making it hard to profile the profiler.
252   void* obj_base = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, obj_fd, 0);
253   if (obj_base == MAP_FAILED) {
254     fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", obj_file.c_str(),
255             strerror(errno));
256     return false;
257   }
258   map_wrapper->set(obj_base, st.st_size);
259   *elf_header = obj_base;
260   if (!IsValidElf(*elf_header)) {
261     fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
262     return false;
263   }
264   return true;
265 }
266 
267 // Get the endianness of ELF_HEADER. If it's invalid, return false.
268 template <typename ElfClass>
ElfEndianness(const typename ElfClass::Ehdr * elf_header,bool * big_endian)269 bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
270                    bool* big_endian) {
271   if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
272     *big_endian = false;
273     return true;
274   }
275   if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
276     *big_endian = true;
277     return true;
278   }
279 
280   fprintf(stderr, "bad data encoding in ELF header: %d\n",
281           elf_header->e_ident[EI_DATA]);
282   return false;
283 }
284 
285 //
286 // LoadSymbolsInfo
287 //
288 // Holds the state between the two calls to LoadSymbols() in case it's necessary
289 // to follow the .gnu_debuglink section and load debug information from a
290 // different file.
291 //
292 template <typename ElfClass>
293 class LoadSymbolsInfo {
294  public:
295   typedef typename ElfClass::Addr Addr;
296 
LoadSymbolsInfo(const vector<string> & dbg_dirs)297   explicit LoadSymbolsInfo(const vector<string>& dbg_dirs)
298       : debug_dirs_(dbg_dirs), has_loading_addr_(false) {}
299 
300   // Keeps track of which sections have been loaded so sections don't
301   // accidentally get loaded twice from two different files.
LoadedSection(const string & section)302   void LoadedSection(const string& section) {
303     if (loaded_sections_.count(section) == 0) {
304       loaded_sections_.insert(section);
305     } else {
306       fprintf(stderr, "Section %s has already been loaded.\n", section.c_str());
307     }
308   }
309 
debuglink_file() const310   string debuglink_file() const { return debuglink_file_; }
311 
312  private:
313   const vector<string>& debug_dirs_;  // Directories in which to
314                                       // search for the debug ELF file.
315 
316   string debuglink_file_;  // Full path to the debug ELF file.
317 
318   bool has_loading_addr_;  // Indicate if LOADING_ADDR_ is valid.
319 
320   set<string> loaded_sections_;  // Tracks the Loaded ELF sections
321                                  // between calls to LoadSymbols().
322 };
323 
324 // Find the preferred loading address of the binary.
325 template <typename ElfClass>
GetLoadingAddress(const typename ElfClass::Phdr * program_headers,int nheader)326 typename ElfClass::Addr GetLoadingAddress(
327     const typename ElfClass::Phdr* program_headers, int nheader) {
328   typedef typename ElfClass::Phdr Phdr;
329 
330   // For non-PIC executables (e_type == ET_EXEC), the load address is
331   // the start address of the first PT_LOAD segment.  (ELF requires
332   // the segments to be sorted by load address.)  For PIC executables
333   // and dynamic libraries (e_type == ET_DYN), this address will
334   // normally be zero.
335   for (int i = 0; i < nheader; ++i) {
336     const Phdr& header = program_headers[i];
337     if (header.p_type == PT_LOAD) return header.p_vaddr;
338   }
339   return 0;
340 }
341 
342 template <typename ElfClass>
LoadSymbols(const string & obj_file,const bool big_endian,const typename ElfClass::Ehdr * elf_header,const bool read_gnu_debug_link,LoadSymbolsInfo<ElfClass> * info,SecMap * smap,void * rx_avma,size_t rx_size,UniqueStringUniverse * usu,void (* log)(const char *))343 bool LoadSymbols(const string& obj_file, const bool big_endian,
344                  const typename ElfClass::Ehdr* elf_header,
345                  const bool read_gnu_debug_link,
346                  LoadSymbolsInfo<ElfClass>* info, SecMap* smap, void* rx_avma,
347                  size_t rx_size, UniqueStringUniverse* usu,
348                  void (*log)(const char*)) {
349   typedef typename ElfClass::Phdr Phdr;
350   typedef typename ElfClass::Shdr Shdr;
351 
352   char buf[500];
353   SprintfLiteral(buf, "LoadSymbols: BEGIN   %s\n", obj_file.c_str());
354   buf[sizeof(buf) - 1] = 0;
355   log(buf);
356 
357   // This is how the text bias is calculated.
358   // BEGIN CALCULATE BIAS
359   uintptr_t loading_addr = GetLoadingAddress<ElfClass>(
360       GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
361       elf_header->e_phnum);
362   uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr;
363   SprintfLiteral(buf, "LoadSymbols:   rx_avma=%llx, text_bias=%llx",
364                  (unsigned long long int)(uintptr_t)rx_avma,
365                  (unsigned long long int)text_bias);
366   buf[sizeof(buf) - 1] = 0;
367   log(buf);
368   // END CALCULATE BIAS
369 
370   const Shdr* sections =
371       GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
372   const Shdr* section_names = sections + elf_header->e_shstrndx;
373   const char* names =
374       GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
375   const char* names_end = names + section_names->sh_size;
376   bool found_usable_info = false;
377 
378   // Dwarf Call Frame Information (CFI) is actually independent from
379   // the other DWARF debugging information, and can be used alone.
380   const Shdr* dwarf_cfi_section =
381       FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS, sections,
382                                      names, names_end, elf_header->e_shnum);
383   if (dwarf_cfi_section) {
384     // Ignore the return value of this function; even without call frame
385     // information, the other debugging information could be perfectly
386     // useful.
387     info->LoadedSection(".debug_frame");
388     bool result = LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
389                                          dwarf_cfi_section, false, 0, 0,
390                                          big_endian, smap, text_bias, usu, log);
391     found_usable_info = found_usable_info || result;
392     if (result) log("LoadSymbols:   read CFI from .debug_frame");
393   }
394 
395   // Linux C++ exception handling information can also provide
396   // unwinding data.
397   const Shdr* eh_frame_section =
398       FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS, sections, names,
399                                      names_end, elf_header->e_shnum);
400   if (eh_frame_section) {
401     // Pointers in .eh_frame data may be relative to the base addresses of
402     // certain sections. Provide those sections if present.
403     const Shdr* got_section = FindElfSectionByName<ElfClass>(
404         ".got", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum);
405     const Shdr* text_section = FindElfSectionByName<ElfClass>(
406         ".text", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum);
407     info->LoadedSection(".eh_frame");
408     // As above, ignore the return value of this function.
409     bool result = LoadDwarfCFI<ElfClass>(
410         obj_file, elf_header, ".eh_frame", eh_frame_section, true, got_section,
411         text_section, big_endian, smap, text_bias, usu, log);
412     found_usable_info = found_usable_info || result;
413     if (result) log("LoadSymbols:   read CFI from .eh_frame");
414   }
415 
416   SprintfLiteral(buf, "LoadSymbols: END     %s\n", obj_file.c_str());
417   buf[sizeof(buf) - 1] = 0;
418   log(buf);
419 
420   return found_usable_info;
421 }
422 
423 // Return the breakpad symbol file identifier for the architecture of
424 // ELF_HEADER.
425 template <typename ElfClass>
ElfArchitecture(const typename ElfClass::Ehdr * elf_header)426 const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
427   typedef typename ElfClass::Half Half;
428   Half arch = elf_header->e_machine;
429   switch (arch) {
430     case EM_386:
431       return "x86";
432     case EM_ARM:
433       return "arm";
434     case EM_AARCH64:
435       return "arm64";
436     case EM_MIPS:
437       return "mips";
438     case EM_PPC64:
439       return "ppc64";
440     case EM_PPC:
441       return "ppc";
442     case EM_S390:
443       return "s390";
444     case EM_SPARC:
445       return "sparc";
446     case EM_SPARCV9:
447       return "sparcv9";
448     case EM_X86_64:
449       return "x86_64";
450     default:
451       return NULL;
452   }
453 }
454 
455 // Format the Elf file identifier in IDENTIFIER as a UUID with the
456 // dashes removed.
FormatIdentifier(unsigned char identifier[16])457 string FormatIdentifier(unsigned char identifier[16]) {
458   char identifier_str[40];
459   lul::FileID::ConvertIdentifierToString(identifier, identifier_str,
460                                          sizeof(identifier_str));
461   string id_no_dash;
462   for (int i = 0; identifier_str[i] != '\0'; ++i)
463     if (identifier_str[i] != '-') id_no_dash += identifier_str[i];
464   // Add an extra "0" by the end.  PDB files on Windows have an 'age'
465   // number appended to the end of the file identifier; this isn't
466   // really used or necessary on other platforms, but be consistent.
467   id_no_dash += '0';
468   return id_no_dash;
469 }
470 
471 // Return the non-directory portion of FILENAME: the portion after the
472 // last slash, or the whole filename if there are no slashes.
BaseFileName(const string & filename)473 string BaseFileName(const string& filename) {
474   // Lots of copies!  basename's behavior is less than ideal.
475   char* c_filename = strdup(filename.c_str());
476   string base = basename(c_filename);
477   free(c_filename);
478   return base;
479 }
480 
481 template <typename ElfClass>
ReadSymbolDataElfClass(const typename ElfClass::Ehdr * elf_header,const string & obj_filename,const vector<string> & debug_dirs,SecMap * smap,void * rx_avma,size_t rx_size,UniqueStringUniverse * usu,void (* log)(const char *))482 bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
483                             const string& obj_filename,
484                             const vector<string>& debug_dirs, SecMap* smap,
485                             void* rx_avma, size_t rx_size,
486                             UniqueStringUniverse* usu,
487                             void (*log)(const char*)) {
488   typedef typename ElfClass::Ehdr Ehdr;
489 
490   unsigned char identifier[16];
491   if (!lul ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
492     fprintf(stderr, "%s: unable to generate file identifier\n",
493             obj_filename.c_str());
494     return false;
495   }
496 
497   const char* architecture = ElfArchitecture<ElfClass>(elf_header);
498   if (!architecture) {
499     fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
500             obj_filename.c_str(), elf_header->e_machine);
501     return false;
502   }
503 
504   // Figure out what endianness this file is.
505   bool big_endian;
506   if (!ElfEndianness<ElfClass>(elf_header, &big_endian)) return false;
507 
508   string name = BaseFileName(obj_filename);
509   string os = "Linux";
510   string id = FormatIdentifier(identifier);
511 
512   LoadSymbolsInfo<ElfClass> info(debug_dirs);
513   if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
514                              !debug_dirs.empty(), &info, smap, rx_avma, rx_size,
515                              usu, log)) {
516     const string debuglink_file = info.debuglink_file();
517     if (debuglink_file.empty()) return false;
518 
519     // Load debuglink ELF file.
520     fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
521     MmapWrapper debug_map_wrapper;
522     Ehdr* debug_elf_header = NULL;
523     if (!LoadELF(debuglink_file, &debug_map_wrapper,
524                  reinterpret_cast<void**>(&debug_elf_header)))
525       return false;
526     // Sanity checks to make sure everything matches up.
527     const char* debug_architecture =
528         ElfArchitecture<ElfClass>(debug_elf_header);
529     if (!debug_architecture) {
530       fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
531               debuglink_file.c_str(), debug_elf_header->e_machine);
532       return false;
533     }
534     if (strcmp(architecture, debug_architecture)) {
535       fprintf(stderr,
536               "%s with ELF machine architecture %s does not match "
537               "%s with ELF architecture %s\n",
538               debuglink_file.c_str(), debug_architecture, obj_filename.c_str(),
539               architecture);
540       return false;
541     }
542 
543     bool debug_big_endian;
544     if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
545       return false;
546     if (debug_big_endian != big_endian) {
547       fprintf(stderr, "%s and %s does not match in endianness\n",
548               obj_filename.c_str(), debuglink_file.c_str());
549       return false;
550     }
551 
552     if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian,
553                                debug_elf_header, false, &info, smap, rx_avma,
554                                rx_size, usu, log)) {
555       return false;
556     }
557   }
558 
559   return true;
560 }
561 
562 }  // namespace
563 
564 namespace lul {
565 
ReadSymbolDataInternal(const uint8_t * obj_file,const string & obj_filename,const vector<string> & debug_dirs,SecMap * smap,void * rx_avma,size_t rx_size,UniqueStringUniverse * usu,void (* log)(const char *))566 bool ReadSymbolDataInternal(const uint8_t* obj_file, const string& obj_filename,
567                             const vector<string>& debug_dirs, SecMap* smap,
568                             void* rx_avma, size_t rx_size,
569                             UniqueStringUniverse* usu,
570                             void (*log)(const char*)) {
571   if (!IsValidElf(obj_file)) {
572     fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
573     return false;
574   }
575 
576   int elfclass = ElfClass(obj_file);
577   if (elfclass == ELFCLASS32) {
578     return ReadSymbolDataElfClass<ElfClass32>(
579         reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, debug_dirs,
580         smap, rx_avma, rx_size, usu, log);
581   }
582   if (elfclass == ELFCLASS64) {
583     return ReadSymbolDataElfClass<ElfClass64>(
584         reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, debug_dirs,
585         smap, rx_avma, rx_size, usu, log);
586   }
587 
588   return false;
589 }
590 
ReadSymbolData(const string & obj_file,const vector<string> & debug_dirs,SecMap * smap,void * rx_avma,size_t rx_size,UniqueStringUniverse * usu,void (* log)(const char *))591 bool ReadSymbolData(const string& obj_file, const vector<string>& debug_dirs,
592                     SecMap* smap, void* rx_avma, size_t rx_size,
593                     UniqueStringUniverse* usu, void (*log)(const char*)) {
594   MmapWrapper map_wrapper;
595   void* elf_header = NULL;
596   if (!LoadELF(obj_file, &map_wrapper, &elf_header)) return false;
597 
598   return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
599                                 obj_file, debug_dirs, smap, rx_avma, rx_size,
600                                 usu, log);
601 }
602 
603 namespace {
604 
605 template <typename ElfClass>
FindElfClassSection(const char * elf_base,const char * section_name,typename ElfClass::Word section_type,const void ** section_start,int * section_size)606 void FindElfClassSection(const char* elf_base, const char* section_name,
607                          typename ElfClass::Word section_type,
608                          const void** section_start, int* section_size) {
609   typedef typename ElfClass::Ehdr Ehdr;
610   typedef typename ElfClass::Shdr Shdr;
611 
612   MOZ_ASSERT(elf_base);
613   MOZ_ASSERT(section_start);
614   MOZ_ASSERT(section_size);
615 
616   MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
617 
618   const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
619   MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
620 
621   const Shdr* sections =
622       GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
623   const Shdr* section_names = sections + elf_header->e_shstrndx;
624   const char* names =
625       GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
626   const char* names_end = names + section_names->sh_size;
627 
628   const Shdr* section =
629       FindElfSectionByName<ElfClass>(section_name, section_type, sections,
630                                      names, names_end, elf_header->e_shnum);
631 
632   if (section != NULL && section->sh_size > 0) {
633     *section_start = elf_base + section->sh_offset;
634     *section_size = section->sh_size;
635   }
636 }
637 
638 template <typename ElfClass>
FindElfClassSegment(const char * elf_base,typename ElfClass::Word segment_type,const void ** segment_start,int * segment_size)639 void FindElfClassSegment(const char* elf_base,
640                          typename ElfClass::Word segment_type,
641                          const void** segment_start, int* segment_size) {
642   typedef typename ElfClass::Ehdr Ehdr;
643   typedef typename ElfClass::Phdr Phdr;
644 
645   MOZ_ASSERT(elf_base);
646   MOZ_ASSERT(segment_start);
647   MOZ_ASSERT(segment_size);
648 
649   MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
650 
651   const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
652   MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
653 
654   const Phdr* phdrs =
655       GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff);
656 
657   for (int i = 0; i < elf_header->e_phnum; ++i) {
658     if (phdrs[i].p_type == segment_type) {
659       *segment_start = elf_base + phdrs[i].p_offset;
660       *segment_size = phdrs[i].p_filesz;
661       return;
662     }
663   }
664 }
665 
666 }  // namespace
667 
IsValidElf(const void * elf_base)668 bool IsValidElf(const void* elf_base) {
669   return strncmp(reinterpret_cast<const char*>(elf_base), ELFMAG, SELFMAG) == 0;
670 }
671 
ElfClass(const void * elf_base)672 int ElfClass(const void* elf_base) {
673   const ElfW(Ehdr)* elf_header = reinterpret_cast<const ElfW(Ehdr)*>(elf_base);
674 
675   return elf_header->e_ident[EI_CLASS];
676 }
677 
FindElfSection(const void * elf_mapped_base,const char * section_name,uint32_t section_type,const void ** section_start,int * section_size,int * elfclass)678 bool FindElfSection(const void* elf_mapped_base, const char* section_name,
679                     uint32_t section_type, const void** section_start,
680                     int* section_size, int* elfclass) {
681   MOZ_ASSERT(elf_mapped_base);
682   MOZ_ASSERT(section_start);
683   MOZ_ASSERT(section_size);
684 
685   *section_start = NULL;
686   *section_size = 0;
687 
688   if (!IsValidElf(elf_mapped_base)) return false;
689 
690   int cls = ElfClass(elf_mapped_base);
691   if (elfclass) {
692     *elfclass = cls;
693   }
694 
695   const char* elf_base = static_cast<const char*>(elf_mapped_base);
696 
697   if (cls == ELFCLASS32) {
698     FindElfClassSection<ElfClass32>(elf_base, section_name, section_type,
699                                     section_start, section_size);
700     return *section_start != NULL;
701   } else if (cls == ELFCLASS64) {
702     FindElfClassSection<ElfClass64>(elf_base, section_name, section_type,
703                                     section_start, section_size);
704     return *section_start != NULL;
705   }
706 
707   return false;
708 }
709 
FindElfSegment(const void * elf_mapped_base,uint32_t segment_type,const void ** segment_start,int * segment_size,int * elfclass)710 bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type,
711                     const void** segment_start, int* segment_size,
712                     int* elfclass) {
713   MOZ_ASSERT(elf_mapped_base);
714   MOZ_ASSERT(segment_start);
715   MOZ_ASSERT(segment_size);
716 
717   *segment_start = NULL;
718   *segment_size = 0;
719 
720   if (!IsValidElf(elf_mapped_base)) return false;
721 
722   int cls = ElfClass(elf_mapped_base);
723   if (elfclass) {
724     *elfclass = cls;
725   }
726 
727   const char* elf_base = static_cast<const char*>(elf_mapped_base);
728 
729   if (cls == ELFCLASS32) {
730     FindElfClassSegment<ElfClass32>(elf_base, segment_type, segment_start,
731                                     segment_size);
732     return *segment_start != NULL;
733   } else if (cls == ELFCLASS64) {
734     FindElfClassSegment<ElfClass64>(elf_base, segment_type, segment_start,
735                                     segment_size);
736     return *segment_start != NULL;
737   }
738 
739   return false;
740 }
741 
742 // (derived from)
743 // file_id.cc: Return a unique identifier for a file
744 //
745 // See file_id.h for documentation
746 //
747 
748 // ELF note name and desc are 32-bits word padded.
749 #define NOTE_PADDING(a) ((a + 3) & ~3)
750 
751 // These functions are also used inside the crashed process, so be safe
752 // and use the syscall/libc wrappers instead of direct syscalls or libc.
753 
754 template <typename ElfClass>
ElfClassBuildIDNoteIdentifier(const void * section,int length,uint8_t identifier[kMDGUIDSize])755 static bool ElfClassBuildIDNoteIdentifier(const void* section, int length,
756                                           uint8_t identifier[kMDGUIDSize]) {
757   typedef typename ElfClass::Nhdr Nhdr;
758 
759   const void* section_end = reinterpret_cast<const char*>(section) + length;
760   const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
761   while (reinterpret_cast<const void*>(note_header) < section_end) {
762     if (note_header->n_type == NT_GNU_BUILD_ID) break;
763     note_header = reinterpret_cast<const Nhdr*>(
764         reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
765         NOTE_PADDING(note_header->n_namesz) +
766         NOTE_PADDING(note_header->n_descsz));
767   }
768   if (reinterpret_cast<const void*>(note_header) >= section_end ||
769       note_header->n_descsz == 0) {
770     return false;
771   }
772 
773   const char* build_id = reinterpret_cast<const char*>(note_header) +
774                          sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
775   // Copy as many bits of the build ID as will fit
776   // into the GUID space.
777   memset(identifier, 0, kMDGUIDSize);
778   memcpy(identifier, build_id,
779          std::min(kMDGUIDSize, (size_t)note_header->n_descsz));
780 
781   return true;
782 }
783 
784 // Attempt to locate a .note.gnu.build-id section in an ELF binary
785 // and copy as many bytes of it as will fit into |identifier|.
FindElfBuildIDNote(const void * elf_mapped_base,uint8_t identifier[kMDGUIDSize])786 static bool FindElfBuildIDNote(const void* elf_mapped_base,
787                                uint8_t identifier[kMDGUIDSize]) {
788   void* note_section;
789   int note_size, elfclass;
790   if ((!FindElfSegment(elf_mapped_base, PT_NOTE, (const void**)&note_section,
791                        &note_size, &elfclass) ||
792        note_size == 0) &&
793       (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
794                        (const void**)&note_section, &note_size, &elfclass) ||
795        note_size == 0)) {
796     return false;
797   }
798 
799   if (elfclass == ELFCLASS32) {
800     return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size,
801                                                      identifier);
802   } else if (elfclass == ELFCLASS64) {
803     return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size,
804                                                      identifier);
805   }
806 
807   return false;
808 }
809 
810 // Attempt to locate the .text section of an ELF binary and generate
811 // a simple hash by XORing the first page worth of bytes into |identifier|.
HashElfTextSection(const void * elf_mapped_base,uint8_t identifier[kMDGUIDSize])812 static bool HashElfTextSection(const void* elf_mapped_base,
813                                uint8_t identifier[kMDGUIDSize]) {
814   void* text_section;
815   int text_size;
816   if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
817                       (const void**)&text_section, &text_size, NULL) ||
818       text_size == 0) {
819     return false;
820   }
821 
822   memset(identifier, 0, kMDGUIDSize);
823   const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
824   const uint8_t* ptr_end = ptr + std::min(text_size, 4096);
825   while (ptr < ptr_end) {
826     for (unsigned i = 0; i < kMDGUIDSize; i++) identifier[i] ^= ptr[i];
827     ptr += kMDGUIDSize;
828   }
829   return true;
830 }
831 
832 // static
ElfFileIdentifierFromMappedFile(const void * base,uint8_t identifier[kMDGUIDSize])833 bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
834                                              uint8_t identifier[kMDGUIDSize]) {
835   // Look for a build id note first.
836   if (FindElfBuildIDNote(base, identifier)) return true;
837 
838   // Fall back on hashing the first page of the text section.
839   return HashElfTextSection(base, identifier);
840 }
841 
842 // static
ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],char * buffer,int buffer_length)843 void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
844                                        char* buffer, int buffer_length) {
845   uint8_t identifier_swapped[kMDGUIDSize];
846 
847   // Endian-ness swap to match dump processor expectation.
848   memcpy(identifier_swapped, identifier, kMDGUIDSize);
849   uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
850   *data1 = htonl(*data1);
851   uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
852   *data2 = htons(*data2);
853   uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
854   *data3 = htons(*data3);
855 
856   int buffer_idx = 0;
857   for (unsigned int idx = 0;
858        (buffer_idx < buffer_length) && (idx < kMDGUIDSize); ++idx) {
859     int hi = (identifier_swapped[idx] >> 4) & 0x0F;
860     int lo = (identifier_swapped[idx]) & 0x0F;
861 
862     if (idx == 4 || idx == 6 || idx == 8 || idx == 10)
863       buffer[buffer_idx++] = '-';
864 
865     buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi;
866     buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo;
867   }
868 
869   // NULL terminate
870   buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0;
871 }
872 
873 }  // namespace lul
874