1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 #include <assert.h>
31 #include <cxxabi.h>
32 #include <elf.h>
33 #include <errno.h>
34 #include <fcntl.h>
35 #include <link.h>
36 #include <string.h>
37 #include <sys/mman.h>
38 #include <sys/stat.h>
39 #include <sys/types.h>
40 #include <unistd.h>
41
42 #include <algorithm>
43 #include <cstdarg>
44 #include <cstdio>
45 #include <cstdlib>
46 #include <cstring>
47 #include <functional>
48 #include <list>
49 #include <map>
50 #include <string>
51 #include <vector>
52
53 #include "common/linux/dump_dwarf.h"
54 #include "common/linux/dump_symbols.h"
55 #include "common/linux/file_id.h"
56 #include "common/linux/guid_creator.h"
57 #include "common/linux/module.h"
58 #include "common/linux/stabs_reader.h"
59 #include "processor/scoped_ptr.h"
60
61 // This namespace contains helper functions.
62 namespace {
63
64 using google_breakpad::Module;
65 using std::vector;
66
67 // Demangle using abi call.
68 // Older GCC may not support it.
Demangle(const std::string & mangled)69 static std::string Demangle(const std::string &mangled) {
70 int status = 0;
71 char *demangled = abi::__cxa_demangle(mangled.c_str(), NULL, NULL, &status);
72 if (status == 0 && demangled != NULL) {
73 std::string str(demangled);
74 free(demangled);
75 return str;
76 }
77 return std::string(mangled);
78 }
79
80 // Fix offset into virtual address by adding the mapped base into offsets.
81 // Make life easier when want to find something by offset.
FixAddress(void * obj_base)82 static void FixAddress(void *obj_base) {
83 ElfW(Word) base = reinterpret_cast<ElfW(Word)>(obj_base);
84 ElfW(Ehdr) *elf_header = static_cast<ElfW(Ehdr) *>(obj_base);
85 elf_header->e_phoff += base;
86 elf_header->e_shoff += base;
87 ElfW(Shdr) *sections = reinterpret_cast<ElfW(Shdr) *>(elf_header->e_shoff);
88 for (int i = 0; i < elf_header->e_shnum; ++i)
89 sections[i].sh_offset += base;
90 }
91
92 // Find the prefered loading address of the binary.
GetLoadingAddress(const ElfW (Phdr)* program_headers,int nheader)93 static ElfW(Addr) GetLoadingAddress(const ElfW(Phdr) *program_headers,
94 int nheader) {
95 for (int i = 0; i < nheader; ++i) {
96 const ElfW(Phdr) &header = program_headers[i];
97 // For executable, it is the PT_LOAD segment with offset to zero.
98 if (header.p_type == PT_LOAD &&
99 header.p_offset == 0)
100 return header.p_vaddr;
101 }
102 // For other types of ELF, return 0.
103 return 0;
104 }
105
IsValidElf(const ElfW (Ehdr)* elf_header)106 static bool IsValidElf(const ElfW(Ehdr) *elf_header) {
107 return memcmp(elf_header, ELFMAG, SELFMAG) == 0;
108 }
109
ElfW(Shdr)110 static const ElfW(Shdr) *FindSectionByName(const char *name,
111 const ElfW(Shdr) *sections,
112 const ElfW(Shdr) *strtab,
113 int nsection) {
114 assert(name != NULL);
115 assert(sections != NULL);
116 assert(nsection > 0);
117
118 int name_len = strlen(name);
119 if (name_len == 0)
120 return NULL;
121
122 for (int i = 0; i < nsection; ++i) {
123 const char *section_name =
124 reinterpret_cast<char*>(strtab->sh_offset + sections[i].sh_name);
125 if (!strncmp(name, section_name, name_len))
126 return sections + i;
127 }
128 return NULL;
129 }
130
131 // Our handler class for STABS data.
132 class DumpStabsHandler: public google_breakpad::StabsHandler {
133 public:
DumpStabsHandler(Module * module)134 DumpStabsHandler(Module *module) :
135 module_(module),
136 comp_unit_base_address_(0),
137 current_function_(NULL),
138 current_source_file_(NULL),
139 current_source_file_name_(NULL) { }
140
141 bool StartCompilationUnit(const char *name, uint64_t address,
142 const char *build_directory);
143 bool EndCompilationUnit(uint64_t address);
144 bool StartFunction(const std::string &name, uint64_t address);
145 bool EndFunction(uint64_t address);
146 bool Line(uint64_t address, const char *name, int number);
147 void Warning(const char *format, ...);
148
149 // Do any final processing necessary to make module_ contain all the
150 // data provided by the STABS reader.
151 //
152 // Because STABS does not provide reliable size information for
153 // functions and lines, we need to make a pass over the data after
154 // processing all the STABS to compute those sizes. We take care of
155 // that here.
156 void Finalize();
157
158 private:
159
160 // An arbitrary, but very large, size to use for functions whose
161 // size we can't compute properly.
162 static const uint64_t kFallbackSize = 0x10000000;
163
164 // The module we're contributing debugging info to.
165 Module *module_;
166
167 // The functions we've generated so far. We don't add these to
168 // module_ as we parse them. Instead, we wait until we've computed
169 // their ending address, and their lines' ending addresses.
170 //
171 // We could just stick them in module_ from the outset, but if
172 // module_ already contains data gathered from other debugging
173 // formats, that would complicate the size computation.
174 vector<Module::Function *> functions_;
175
176 // Boundary addresses. STABS doesn't necessarily supply sizes for
177 // functions and lines, so we need to compute them ourselves by
178 // finding the next object.
179 vector<Module::Address> boundaries_;
180
181 // The base address of the current compilation unit. We use this to
182 // recognize functions we should omit from the symbol file. (If you
183 // know the details of why we omit these, please patch this
184 // comment.)
185 Module::Address comp_unit_base_address_;
186
187 // The function we're currently contributing lines to.
188 Module::Function *current_function_;
189
190 // The last Module::File we got a line number in.
191 Module::File *current_source_file_;
192
193 // The pointer in the .stabstr section of the name that
194 // current_source_file_ is built from. This allows us to quickly
195 // recognize when the current line is in the same file as the
196 // previous one (which it usually is).
197 const char *current_source_file_name_;
198 };
199
StartCompilationUnit(const char * name,uint64_t address,const char * build_directory)200 bool DumpStabsHandler::StartCompilationUnit(const char *name, uint64_t address,
201 const char *build_directory) {
202 assert(! comp_unit_base_address_);
203 current_source_file_name_ = name;
204 current_source_file_ = module_->FindFile(name);
205 comp_unit_base_address_ = address;
206 boundaries_.push_back(static_cast<Module::Address>(address));
207 return true;
208 }
209
EndCompilationUnit(uint64_t address)210 bool DumpStabsHandler::EndCompilationUnit(uint64_t address) {
211 assert(comp_unit_base_address_);
212 comp_unit_base_address_ = 0;
213 current_source_file_ = NULL;
214 current_source_file_name_ = NULL;
215 if (address)
216 boundaries_.push_back(static_cast<Module::Address>(address));
217 return true;
218 }
219
StartFunction(const std::string & name,uint64_t address)220 bool DumpStabsHandler::StartFunction(const std::string &name,
221 uint64_t address) {
222 assert(! current_function_);
223 Module::Function *f = new Module::Function;
224 f->name_ = Demangle(name);
225 f->address_ = address;
226 f->size_ = 0; // We compute this in DumpStabsHandler::Finalize().
227 f->parameter_size_ = 0; // We don't provide this information.
228 current_function_ = f;
229 boundaries_.push_back(static_cast<Module::Address>(address));
230 return true;
231 }
232
EndFunction(uint64_t address)233 bool DumpStabsHandler::EndFunction(uint64_t address) {
234 assert(current_function_);
235 // Functions in this compilation unit should have address bigger
236 // than the compilation unit's starting address. There may be a lot
237 // of duplicated entries for functions in the STABS data; only one
238 // entry can meet this requirement.
239 //
240 // (I don't really understand the above comment; just bringing it
241 // along from the previous code, and leaving the behaivor unchanged.
242 // If you know the whole story, please patch this comment. --jimb)
243 if (current_function_->address_ >= comp_unit_base_address_)
244 functions_.push_back(current_function_);
245 else
246 delete current_function_;
247 current_function_ = NULL;
248 if (address)
249 boundaries_.push_back(static_cast<Module::Address>(address));
250 return true;
251 }
252
Line(uint64_t address,const char * name,int number)253 bool DumpStabsHandler::Line(uint64_t address, const char *name, int number) {
254 assert(current_function_);
255 assert(current_source_file_);
256 if (name != current_source_file_name_) {
257 current_source_file_ = module_->FindFile(name);
258 current_source_file_name_ = name;
259 }
260 Module::Line line;
261 line.address_ = address;
262 line.size_ = 0; // We compute this in DumpStabsHandler::Finalize().
263 line.file_ = current_source_file_;
264 line.number_ = number;
265 current_function_->lines_.push_back(line);
266 return true;
267 }
268
Warning(const char * format,...)269 void DumpStabsHandler::Warning(const char *format, ...) {
270 va_list args;
271 va_start(args, format);
272 vfprintf(stderr, format, args);
273 va_end(args);
274 }
275
Finalize()276 void DumpStabsHandler::Finalize() {
277 // Sort our boundary list, so we can search it quickly.
278 sort(boundaries_.begin(), boundaries_.end());
279 // Sort all functions by address, just for neatness.
280 sort(functions_.begin(), functions_.end(),
281 Module::Function::CompareByAddress);
282 for (vector<Module::Function *>::iterator func_it = functions_.begin();
283 func_it != functions_.end();
284 func_it++) {
285 Module::Function *f = *func_it;
286 // Compute the function f's size.
287 vector<Module::Address>::iterator boundary
288 = std::upper_bound(boundaries_.begin(), boundaries_.end(), f->address_);
289 if (boundary != boundaries_.end())
290 f->size_ = *boundary - f->address_;
291 else
292 // If this is the last function in the module, and the STABS
293 // reader was unable to give us its ending address, then assign
294 // it a bogus, very large value. This will happen at most once
295 // per module: since we've added all functions' addresses to the
296 // boundary table, only one can be the last.
297 f->size_ = kFallbackSize;
298
299 // Compute sizes for each of the function f's lines --- if it has any.
300 if (! f->lines_.empty()) {
301 stable_sort(f->lines_.begin(), f->lines_.end(),
302 Module::Line::CompareByAddress);
303 vector<Module::Line>::iterator last_line = f->lines_.end() - 1;
304 for (vector<Module::Line>::iterator line_it = f->lines_.begin();
305 line_it != last_line; line_it++)
306 line_it[0].size_ = line_it[1].address_ - line_it[0].address_;
307 // Compute the size of the last line from f's end address.
308 last_line->size_ = (f->address_ + f->size_) - last_line->address_;
309 }
310 }
311 // Now that everything has a size, add our functions to the module, and
312 // dispose of our private list.
313 module_->AddFunctions(functions_.begin(), functions_.end());
314 functions_.clear();
315 }
316
LoadStabs(const ElfW (Shdr)* stab_section,const ElfW (Shdr)* stabstr_section,Module * module)317 static bool LoadStabs(const ElfW(Shdr) *stab_section,
318 const ElfW(Shdr) *stabstr_section,
319 Module *module) {
320 // A callback object to handle data from the STABS reader.
321 DumpStabsHandler handler(module);
322 // Find the addresses of the STABS data, and create a STABS reader object.
323 uint8_t *stabs = reinterpret_cast<uint8_t *>(stab_section->sh_offset);
324 uint8_t *stabstr = reinterpret_cast<uint8_t *>(stabstr_section->sh_offset);
325 google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
326 stabstr, stabstr_section->sh_size,
327 &handler);
328 // Read the STABS data, and do post-processing.
329 if (! reader.Process())
330 return false;
331 handler.Finalize();
332 return true;
333 }
334
LoadSymbols(const std::string & obj_file,ElfW (Ehdr)* elf_header,Module * module)335 static bool LoadSymbols(const std::string &obj_file, ElfW(Ehdr) *elf_header,
336 Module *module) {
337 // Translate all offsets in section headers into address.
338 FixAddress(elf_header);
339 ElfW(Addr) loading_addr = GetLoadingAddress(
340 reinterpret_cast<ElfW(Phdr) *>(elf_header->e_phoff),
341 elf_header->e_phnum);
342 module->SetLoadAddress(loading_addr);
343
344 const ElfW(Shdr) *sections =
345 reinterpret_cast<ElfW(Shdr) *>(elf_header->e_shoff);
346 const ElfW(Shdr) *strtab = sections + elf_header->e_shstrndx;
347 bool found_some_debug_info = false;
348 const ElfW(Shdr) *stab_section
349 = FindSectionByName(".stab", sections, strtab, elf_header->e_shnum);
350 if (stab_section) {
351 const ElfW(Shdr) *stabstr_section = stab_section->sh_link + sections;
352 if (stabstr_section) {
353 found_some_debug_info = true;
354 if (! LoadStabs(stab_section, stabstr_section, module))
355 fprintf(stderr, "\".stab\" section found, but failed to load STABS"
356 " debugging information\n");
357 }
358 }
359 const ElfW(Shdr) *dwarf_section
360 = FindSectionByName(".debug_info", sections, strtab, elf_header->e_shnum);
361 if (dwarf_section) {
362 found_some_debug_info = true;
363 if (! LoadDwarf(obj_file, elf_header, module))
364 fprintf(stderr, "\".debug_info\" section found, but failed to load "
365 "DWARF debugging information\n");
366 }
367 if (! found_some_debug_info) {
368 fprintf(stderr, "file %s contains no debugging information (no \".stab\" or \".debug_info\" sections)\n", obj_file.c_str());
369 return false;
370 }
371 return true;
372 }
373
374 //
375 // FDWrapper
376 //
377 // Wrapper class to make sure opened file is closed.
378 //
379 class FDWrapper {
380 public:
FDWrapper(int fd)381 explicit FDWrapper(int fd) :
382 fd_(fd) {
383 }
~FDWrapper()384 ~FDWrapper() {
385 if (fd_ != -1)
386 close(fd_);
387 }
get()388 int get() {
389 return fd_;
390 }
release()391 int release() {
392 int fd = fd_;
393 fd_ = -1;
394 return fd;
395 }
396 private:
397 int fd_;
398 };
399
400 //
401 // MmapWrapper
402 //
403 // Wrapper class to make sure mapped regions are unmapped.
404 //
405 class MmapWrapper {
406 public:
MmapWrapper(void * mapped_address,size_t mapped_size)407 MmapWrapper(void *mapped_address, size_t mapped_size) :
408 base_(mapped_address), size_(mapped_size) {
409 }
~MmapWrapper()410 ~MmapWrapper() {
411 if (base_ != NULL) {
412 assert(size_ > 0);
413 munmap(base_, size_);
414 }
415 }
release()416 void release() {
417 base_ = NULL;
418 size_ = 0;
419 }
420
421 private:
422 void *base_;
423 size_t size_;
424 };
425
426 // Return the breakpad symbol file identifier for the architecture of
427 // ELF_HEADER.
ElfArchitecture(const ElfW (Ehdr)* elf_header)428 const char *ElfArchitecture(const ElfW(Ehdr) *elf_header) {
429 ElfW(Half) arch = elf_header->e_machine;
430 if (arch == EM_386)
431 return "x86";
432 else if (arch == EM_X86_64)
433 return "x86_64";
434 else
435 return NULL;
436 }
437
438 // Format the Elf file identifier in IDENTIFIER as a UUID with the
439 // dashes removed.
FormatIdentifier(unsigned char identifier[16])440 std::string FormatIdentifier(unsigned char identifier[16]) {
441 char identifier_str[40];
442 google_breakpad::FileID::ConvertIdentifierToString(
443 identifier,
444 identifier_str,
445 sizeof(identifier_str));
446 std::string id_no_dash;
447 for (int i = 0; identifier_str[i] != '\0'; ++i)
448 if (identifier_str[i] != '-')
449 id_no_dash += identifier_str[i];
450 // Add an extra "0" by the end. PDB files on Windows have an 'age'
451 // number appended to the end of the file identifier; this isn't
452 // really used or necessary on other platforms, but let's preserve
453 // the pattern.
454 id_no_dash += '0';
455 return id_no_dash;
456 }
457
458 // Return the non-directory portion of FILENAME: the portion after the
459 // last slash, or the whole filename if there are no slashes.
BaseFileName(const std::string & filename)460 std::string BaseFileName(const std::string &filename) {
461 // Lots of copies! basename's behavior is less than ideal.
462 char *c_filename = strdup(filename.c_str());
463 std::string base = basename(c_filename);
464 free(c_filename);
465 return base;
466 }
467
468 } // namespace
469
470 namespace google_breakpad {
471
WriteSymbolFile(const std::string & obj_file,FILE * sym_file)472 bool DumpSymbols::WriteSymbolFile(const std::string &obj_file,
473 FILE *sym_file) {
474 int obj_fd = open(obj_file.c_str(), O_RDONLY);
475 if (obj_fd < 0)
476 return false;
477 FDWrapper obj_fd_wrapper(obj_fd);
478 struct stat st;
479 if (fstat(obj_fd, &st) != 0 && st.st_size <= 0)
480 return false;
481 void *obj_base = mmap(NULL, st.st_size,
482 PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0);
483 if (obj_base == MAP_FAILED)
484 return false;
485 MmapWrapper map_wrapper(obj_base, st.st_size);
486 ElfW(Ehdr) *elf_header = reinterpret_cast<ElfW(Ehdr) *>(obj_base);
487 if (!IsValidElf(elf_header))
488 return false;
489
490 unsigned char identifier[16];
491 google_breakpad::FileID file_id(obj_file.c_str());
492 if (! file_id.ElfFileIdentifier(identifier))
493 return false;
494
495 const char *architecture = ElfArchitecture(elf_header);
496 if (! architecture)
497 return false;
498
499 std::string name = BaseFileName(obj_file);
500 std::string os = "Linux";
501 std::string id = FormatIdentifier(identifier);
502
503 Module module(name, os, architecture, id);
504 if (!LoadSymbols(obj_file, elf_header, &module))
505 return false;
506 if (!module.Write(sym_file))
507 return false;
508
509 return true;
510 }
511
512 } // namespace google_breakpad
513