1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include <assert.h>
31 #include <cxxabi.h>
32 #include <elf.h>
33 #include <errno.h>
34 #include <fcntl.h>
35 #include <link.h>
36 #include <string.h>
37 #include <sys/mman.h>
38 #include <sys/stat.h>
39 #include <sys/types.h>
40 #include <unistd.h>
41 
42 #include <algorithm>
43 #include <cstdarg>
44 #include <cstdio>
45 #include <cstdlib>
46 #include <cstring>
47 #include <functional>
48 #include <list>
49 #include <map>
50 #include <string>
51 #include <vector>
52 
53 #include "common/linux/dump_dwarf.h"
54 #include "common/linux/dump_symbols.h"
55 #include "common/linux/file_id.h"
56 #include "common/linux/guid_creator.h"
57 #include "common/linux/module.h"
58 #include "common/linux/stabs_reader.h"
59 #include "processor/scoped_ptr.h"
60 
61 // This namespace contains helper functions.
62 namespace {
63 
64 using google_breakpad::Module;
65 using std::vector;
66 
67 // Demangle using abi call.
68 // Older GCC may not support it.
Demangle(const std::string & mangled)69 static std::string Demangle(const std::string &mangled) {
70   int status = 0;
71   char *demangled = abi::__cxa_demangle(mangled.c_str(), NULL, NULL, &status);
72   if (status == 0 && demangled != NULL) {
73     std::string str(demangled);
74     free(demangled);
75     return str;
76   }
77   return std::string(mangled);
78 }
79 
80 // Fix offset into virtual address by adding the mapped base into offsets.
81 // Make life easier when want to find something by offset.
FixAddress(void * obj_base)82 static void FixAddress(void *obj_base) {
83   ElfW(Word) base = reinterpret_cast<ElfW(Word)>(obj_base);
84   ElfW(Ehdr) *elf_header = static_cast<ElfW(Ehdr) *>(obj_base);
85   elf_header->e_phoff += base;
86   elf_header->e_shoff += base;
87   ElfW(Shdr) *sections = reinterpret_cast<ElfW(Shdr) *>(elf_header->e_shoff);
88   for (int i = 0; i < elf_header->e_shnum; ++i)
89     sections[i].sh_offset += base;
90 }
91 
92 // Find the prefered loading address of the binary.
GetLoadingAddress(const ElfW (Phdr)* program_headers,int nheader)93 static ElfW(Addr) GetLoadingAddress(const ElfW(Phdr) *program_headers,
94                                     int nheader) {
95   for (int i = 0; i < nheader; ++i) {
96     const ElfW(Phdr) &header = program_headers[i];
97     // For executable, it is the PT_LOAD segment with offset to zero.
98     if (header.p_type == PT_LOAD &&
99         header.p_offset == 0)
100       return header.p_vaddr;
101   }
102   // For other types of ELF, return 0.
103   return 0;
104 }
105 
IsValidElf(const ElfW (Ehdr)* elf_header)106 static bool IsValidElf(const ElfW(Ehdr) *elf_header) {
107   return memcmp(elf_header, ELFMAG, SELFMAG) == 0;
108 }
109 
ElfW(Shdr)110 static const ElfW(Shdr) *FindSectionByName(const char *name,
111                                            const ElfW(Shdr) *sections,
112                                            const ElfW(Shdr) *strtab,
113                                            int nsection) {
114   assert(name != NULL);
115   assert(sections != NULL);
116   assert(nsection > 0);
117 
118   int name_len = strlen(name);
119   if (name_len == 0)
120     return NULL;
121 
122   for (int i = 0; i < nsection; ++i) {
123     const char *section_name =
124       reinterpret_cast<char*>(strtab->sh_offset + sections[i].sh_name);
125     if (!strncmp(name, section_name, name_len))
126       return sections + i;
127   }
128   return NULL;
129 }
130 
131 // Our handler class for STABS data.
132 class DumpStabsHandler: public google_breakpad::StabsHandler {
133  public:
DumpStabsHandler(Module * module)134   DumpStabsHandler(Module *module) :
135       module_(module),
136       comp_unit_base_address_(0),
137       current_function_(NULL),
138       current_source_file_(NULL),
139       current_source_file_name_(NULL) { }
140 
141   bool StartCompilationUnit(const char *name, uint64_t address,
142                             const char *build_directory);
143   bool EndCompilationUnit(uint64_t address);
144   bool StartFunction(const std::string &name, uint64_t address);
145   bool EndFunction(uint64_t address);
146   bool Line(uint64_t address, const char *name, int number);
147   void Warning(const char *format, ...);
148 
149   // Do any final processing necessary to make module_ contain all the
150   // data provided by the STABS reader.
151   //
152   // Because STABS does not provide reliable size information for
153   // functions and lines, we need to make a pass over the data after
154   // processing all the STABS to compute those sizes.  We take care of
155   // that here.
156   void Finalize();
157 
158  private:
159 
160   // An arbitrary, but very large, size to use for functions whose
161   // size we can't compute properly.
162   static const uint64_t kFallbackSize = 0x10000000;
163 
164   // The module we're contributing debugging info to.
165   Module *module_;
166 
167   // The functions we've generated so far.  We don't add these to
168   // module_ as we parse them.  Instead, we wait until we've computed
169   // their ending address, and their lines' ending addresses.
170   //
171   // We could just stick them in module_ from the outset, but if
172   // module_ already contains data gathered from other debugging
173   // formats, that would complicate the size computation.
174   vector<Module::Function *> functions_;
175 
176   // Boundary addresses.  STABS doesn't necessarily supply sizes for
177   // functions and lines, so we need to compute them ourselves by
178   // finding the next object.
179   vector<Module::Address> boundaries_;
180 
181   // The base address of the current compilation unit.  We use this to
182   // recognize functions we should omit from the symbol file.  (If you
183   // know the details of why we omit these, please patch this
184   // comment.)
185   Module::Address comp_unit_base_address_;
186 
187   // The function we're currently contributing lines to.
188   Module::Function *current_function_;
189 
190   // The last Module::File we got a line number in.
191   Module::File *current_source_file_;
192 
193   // The pointer in the .stabstr section of the name that
194   // current_source_file_ is built from.  This allows us to quickly
195   // recognize when the current line is in the same file as the
196   // previous one (which it usually is).
197   const char *current_source_file_name_;
198 };
199 
StartCompilationUnit(const char * name,uint64_t address,const char * build_directory)200 bool DumpStabsHandler::StartCompilationUnit(const char *name, uint64_t address,
201                                             const char *build_directory) {
202   assert(! comp_unit_base_address_);
203   current_source_file_name_ = name;
204   current_source_file_ = module_->FindFile(name);
205   comp_unit_base_address_ = address;
206   boundaries_.push_back(static_cast<Module::Address>(address));
207   return true;
208 }
209 
EndCompilationUnit(uint64_t address)210 bool DumpStabsHandler::EndCompilationUnit(uint64_t address) {
211   assert(comp_unit_base_address_);
212   comp_unit_base_address_ = 0;
213   current_source_file_ = NULL;
214   current_source_file_name_ = NULL;
215   if (address)
216     boundaries_.push_back(static_cast<Module::Address>(address));
217   return true;
218 }
219 
StartFunction(const std::string & name,uint64_t address)220 bool DumpStabsHandler::StartFunction(const std::string &name,
221                                      uint64_t address) {
222   assert(! current_function_);
223   Module::Function *f = new Module::Function;
224   f->name_ = Demangle(name);
225   f->address_ = address;
226   f->size_ = 0;           // We compute this in DumpStabsHandler::Finalize().
227   f->parameter_size_ = 0; // We don't provide this information.
228   current_function_ = f;
229   boundaries_.push_back(static_cast<Module::Address>(address));
230   return true;
231 }
232 
EndFunction(uint64_t address)233 bool DumpStabsHandler::EndFunction(uint64_t address) {
234   assert(current_function_);
235   // Functions in this compilation unit should have address bigger
236   // than the compilation unit's starting address.  There may be a lot
237   // of duplicated entries for functions in the STABS data; only one
238   // entry can meet this requirement.
239   //
240   // (I don't really understand the above comment; just bringing it
241   // along from the previous code, and leaving the behaivor unchanged.
242   // If you know the whole story, please patch this comment.  --jimb)
243   if (current_function_->address_ >= comp_unit_base_address_)
244     functions_.push_back(current_function_);
245   else
246     delete current_function_;
247   current_function_ = NULL;
248   if (address)
249     boundaries_.push_back(static_cast<Module::Address>(address));
250   return true;
251 }
252 
Line(uint64_t address,const char * name,int number)253 bool DumpStabsHandler::Line(uint64_t address, const char *name, int number) {
254   assert(current_function_);
255   assert(current_source_file_);
256   if (name != current_source_file_name_) {
257     current_source_file_ = module_->FindFile(name);
258     current_source_file_name_ = name;
259   }
260   Module::Line line;
261   line.address_ = address;
262   line.size_ = 0;  // We compute this in DumpStabsHandler::Finalize().
263   line.file_ = current_source_file_;
264   line.number_ = number;
265   current_function_->lines_.push_back(line);
266   return true;
267 }
268 
Warning(const char * format,...)269 void DumpStabsHandler::Warning(const char *format, ...) {
270   va_list args;
271   va_start(args, format);
272   vfprintf(stderr, format, args);
273   va_end(args);
274 }
275 
Finalize()276 void DumpStabsHandler::Finalize() {
277   // Sort our boundary list, so we can search it quickly.
278   sort(boundaries_.begin(), boundaries_.end());
279   // Sort all functions by address, just for neatness.
280   sort(functions_.begin(), functions_.end(),
281        Module::Function::CompareByAddress);
282   for (vector<Module::Function *>::iterator func_it = functions_.begin();
283        func_it != functions_.end();
284        func_it++) {
285     Module::Function *f = *func_it;
286     // Compute the function f's size.
287     vector<Module::Address>::iterator boundary
288         = std::upper_bound(boundaries_.begin(), boundaries_.end(), f->address_);
289     if (boundary != boundaries_.end())
290       f->size_ = *boundary - f->address_;
291     else
292       // If this is the last function in the module, and the STABS
293       // reader was unable to give us its ending address, then assign
294       // it a bogus, very large value.  This will happen at most once
295       // per module: since we've added all functions' addresses to the
296       // boundary table, only one can be the last.
297       f->size_ = kFallbackSize;
298 
299     // Compute sizes for each of the function f's lines --- if it has any.
300     if (! f->lines_.empty()) {
301       stable_sort(f->lines_.begin(), f->lines_.end(),
302                   Module::Line::CompareByAddress);
303       vector<Module::Line>::iterator last_line = f->lines_.end() - 1;
304       for (vector<Module::Line>::iterator line_it = f->lines_.begin();
305            line_it != last_line; line_it++)
306         line_it[0].size_ = line_it[1].address_ - line_it[0].address_;
307       // Compute the size of the last line from f's end address.
308       last_line->size_ = (f->address_ + f->size_) - last_line->address_;
309     }
310   }
311   // Now that everything has a size, add our functions to the module, and
312   // dispose of our private list.
313   module_->AddFunctions(functions_.begin(), functions_.end());
314   functions_.clear();
315 }
316 
LoadStabs(const ElfW (Shdr)* stab_section,const ElfW (Shdr)* stabstr_section,Module * module)317 static bool LoadStabs(const ElfW(Shdr) *stab_section,
318                       const ElfW(Shdr) *stabstr_section,
319                       Module *module) {
320   // A callback object to handle data from the STABS reader.
321   DumpStabsHandler handler(module);
322   // Find the addresses of the STABS data, and create a STABS reader object.
323   uint8_t *stabs = reinterpret_cast<uint8_t *>(stab_section->sh_offset);
324   uint8_t *stabstr = reinterpret_cast<uint8_t *>(stabstr_section->sh_offset);
325   google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
326                                       stabstr, stabstr_section->sh_size,
327                                       &handler);
328   // Read the STABS data, and do post-processing.
329   if (! reader.Process())
330     return false;
331   handler.Finalize();
332   return true;
333 }
334 
LoadSymbols(const std::string & obj_file,ElfW (Ehdr)* elf_header,Module * module)335 static bool LoadSymbols(const std::string &obj_file, ElfW(Ehdr) *elf_header,
336                         Module *module) {
337   // Translate all offsets in section headers into address.
338   FixAddress(elf_header);
339   ElfW(Addr) loading_addr = GetLoadingAddress(
340       reinterpret_cast<ElfW(Phdr) *>(elf_header->e_phoff),
341       elf_header->e_phnum);
342   module->SetLoadAddress(loading_addr);
343 
344   const ElfW(Shdr) *sections =
345       reinterpret_cast<ElfW(Shdr) *>(elf_header->e_shoff);
346   const ElfW(Shdr) *strtab = sections + elf_header->e_shstrndx;
347   bool found_some_debug_info = false;
348   const ElfW(Shdr) *stab_section
349       = FindSectionByName(".stab", sections, strtab, elf_header->e_shnum);
350   if (stab_section) {
351     const ElfW(Shdr) *stabstr_section = stab_section->sh_link + sections;
352     if (stabstr_section) {
353       found_some_debug_info = true;
354       if (! LoadStabs(stab_section, stabstr_section, module))
355         fprintf(stderr, "\".stab\" section found, but failed to load STABS"
356                 " debugging information\n");
357     }
358   }
359   const ElfW(Shdr) *dwarf_section
360       = FindSectionByName(".debug_info", sections, strtab, elf_header->e_shnum);
361   if (dwarf_section) {
362     found_some_debug_info = true;
363     if (! LoadDwarf(obj_file, elf_header, module))
364       fprintf(stderr, "\".debug_info\" section found, but failed to load "
365               "DWARF debugging information\n");
366   }
367   if (! found_some_debug_info) {
368     fprintf(stderr, "file %s contains no debugging information (no \".stab\" or \".debug_info\" sections)\n", obj_file.c_str());
369     return false;
370   }
371   return true;
372 }
373 
374 //
375 // FDWrapper
376 //
377 // Wrapper class to make sure opened file is closed.
378 //
379 class FDWrapper {
380  public:
FDWrapper(int fd)381   explicit FDWrapper(int fd) :
382     fd_(fd) {
383     }
~FDWrapper()384   ~FDWrapper() {
385     if (fd_ != -1)
386       close(fd_);
387   }
get()388   int get() {
389     return fd_;
390   }
release()391   int release() {
392     int fd = fd_;
393     fd_ = -1;
394     return fd;
395   }
396  private:
397   int fd_;
398 };
399 
400 //
401 // MmapWrapper
402 //
403 // Wrapper class to make sure mapped regions are unmapped.
404 //
405 class MmapWrapper {
406   public:
MmapWrapper(void * mapped_address,size_t mapped_size)407    MmapWrapper(void *mapped_address, size_t mapped_size) :
408      base_(mapped_address), size_(mapped_size) {
409    }
~MmapWrapper()410    ~MmapWrapper() {
411      if (base_ != NULL) {
412        assert(size_ > 0);
413        munmap(base_, size_);
414      }
415    }
release()416    void release() {
417      base_ = NULL;
418      size_ = 0;
419    }
420 
421   private:
422    void *base_;
423    size_t size_;
424 };
425 
426 // Return the breakpad symbol file identifier for the architecture of
427 // ELF_HEADER.
ElfArchitecture(const ElfW (Ehdr)* elf_header)428 const char *ElfArchitecture(const ElfW(Ehdr) *elf_header) {
429   ElfW(Half) arch = elf_header->e_machine;
430   if (arch == EM_386)
431     return "x86";
432   else if (arch == EM_X86_64)
433     return "x86_64";
434   else
435     return NULL;
436 }
437 
438 // Format the Elf file identifier in IDENTIFIER as a UUID with the
439 // dashes removed.
FormatIdentifier(unsigned char identifier[16])440 std::string FormatIdentifier(unsigned char identifier[16]) {
441   char identifier_str[40];
442   google_breakpad::FileID::ConvertIdentifierToString(
443       identifier,
444       identifier_str,
445       sizeof(identifier_str));
446   std::string id_no_dash;
447   for (int i = 0; identifier_str[i] != '\0'; ++i)
448     if (identifier_str[i] != '-')
449       id_no_dash += identifier_str[i];
450   // Add an extra "0" by the end.  PDB files on Windows have an 'age'
451   // number appended to the end of the file identifier; this isn't
452   // really used or necessary on other platforms, but let's preserve
453   // the pattern.
454   id_no_dash += '0';
455   return id_no_dash;
456 }
457 
458 // Return the non-directory portion of FILENAME: the portion after the
459 // last slash, or the whole filename if there are no slashes.
BaseFileName(const std::string & filename)460 std::string BaseFileName(const std::string &filename) {
461   // Lots of copies!  basename's behavior is less than ideal.
462   char *c_filename = strdup(filename.c_str());
463   std::string base = basename(c_filename);
464   free(c_filename);
465   return base;
466 }
467 
468 }  // namespace
469 
470 namespace google_breakpad {
471 
WriteSymbolFile(const std::string & obj_file,FILE * sym_file)472 bool DumpSymbols::WriteSymbolFile(const std::string &obj_file,
473                                   FILE *sym_file) {
474   int obj_fd = open(obj_file.c_str(), O_RDONLY);
475   if (obj_fd < 0)
476     return false;
477   FDWrapper obj_fd_wrapper(obj_fd);
478   struct stat st;
479   if (fstat(obj_fd, &st) != 0 && st.st_size <= 0)
480     return false;
481   void *obj_base = mmap(NULL, st.st_size,
482                         PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0);
483   if (obj_base == MAP_FAILED)
484     return false;
485   MmapWrapper map_wrapper(obj_base, st.st_size);
486   ElfW(Ehdr) *elf_header = reinterpret_cast<ElfW(Ehdr) *>(obj_base);
487   if (!IsValidElf(elf_header))
488     return false;
489 
490   unsigned char identifier[16];
491   google_breakpad::FileID file_id(obj_file.c_str());
492   if (! file_id.ElfFileIdentifier(identifier))
493     return false;
494 
495   const char *architecture = ElfArchitecture(elf_header);
496   if (! architecture)
497     return false;
498 
499   std::string name = BaseFileName(obj_file);
500   std::string os = "Linux";
501   std::string id = FormatIdentifier(identifier);
502 
503   Module module(name, os, architecture, id);
504   if (!LoadSymbols(obj_file, elf_header, &module))
505     return false;
506   if (!module.Write(sym_file))
507     return false;
508 
509   return true;
510 }
511 
512 }  // namespace google_breakpad
513