1 // Copyright (c) 2011 Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
31
32 // dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
33 // Find all the debugging info in a file and dump it as a Breakpad symbol file.
34
35 #include "common/linux/dump_symbols.h"
36
37 #include <assert.h>
38 #include <elf.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <limits.h>
42 #include <link.h>
43 #include <stdint.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <sys/mman.h>
48 #include <sys/stat.h>
49 #include <unistd.h>
50
51 #include <iostream>
52 #include <set>
53 #include <string>
54 #include <utility>
55 #include <vector>
56
57 #include "common/dwarf/bytereader-inl.h"
58 #include "common/dwarf/dwarf2diehandler.h"
59 #include "common/dwarf_cfi_to_module.h"
60 #include "common/dwarf_cu_to_module.h"
61 #include "common/dwarf_line_to_module.h"
62 #include "common/dwarf_range_list_handler.h"
63 #include "common/linux/crc32.h"
64 #include "common/linux/eintr_wrapper.h"
65 #include "common/linux/elfutils.h"
66 #include "common/linux/elfutils-inl.h"
67 #include "common/linux/elf_symbols_to_module.h"
68 #include "common/linux/file_id.h"
69 #include "common/memory_allocator.h"
70 #include "common/module.h"
71 #include "common/path_helper.h"
72 #include "common/scoped_ptr.h"
73 #ifndef NO_STABS_SUPPORT
74 #include "common/stabs_reader.h"
75 #include "common/stabs_to_module.h"
76 #endif
77 #include "common/using_std_string.h"
78
79 // This namespace contains helper functions.
80 namespace {
81
82 using google_breakpad::DumpOptions;
83 using google_breakpad::DwarfCFIToModule;
84 using google_breakpad::DwarfCUToModule;
85 using google_breakpad::DwarfLineToModule;
86 using google_breakpad::DwarfRangeListHandler;
87 using google_breakpad::ElfClass;
88 using google_breakpad::ElfClass32;
89 using google_breakpad::ElfClass64;
90 using google_breakpad::FileID;
91 using google_breakpad::FindElfSectionByName;
92 using google_breakpad::GetOffset;
93 using google_breakpad::IsValidElf;
94 using google_breakpad::kDefaultBuildIdSize;
95 using google_breakpad::Module;
96 using google_breakpad::PageAllocator;
97 #ifndef NO_STABS_SUPPORT
98 using google_breakpad::StabsToModule;
99 #endif
100 using google_breakpad::scoped_ptr;
101 using google_breakpad::wasteful_vector;
102
103 // Define AARCH64 ELF architecture if host machine does not include this define.
104 #ifndef EM_AARCH64
105 #define EM_AARCH64 183
106 #endif
107
108 //
109 // FDWrapper
110 //
111 // Wrapper class to make sure opened file is closed.
112 //
113 class FDWrapper {
114 public:
FDWrapper(int fd)115 explicit FDWrapper(int fd) :
116 fd_(fd) {}
~FDWrapper()117 ~FDWrapper() {
118 if (fd_ != -1)
119 close(fd_);
120 }
get()121 int get() {
122 return fd_;
123 }
release()124 int release() {
125 int fd = fd_;
126 fd_ = -1;
127 return fd;
128 }
129 private:
130 int fd_;
131 };
132
133 //
134 // MmapWrapper
135 //
136 // Wrapper class to make sure mapped regions are unmapped.
137 //
138 class MmapWrapper {
139 public:
MmapWrapper()140 MmapWrapper() : is_set_(false) {}
~MmapWrapper()141 ~MmapWrapper() {
142 if (is_set_ && base_ != NULL) {
143 assert(size_ > 0);
144 munmap(base_, size_);
145 }
146 }
set(void * mapped_address,size_t mapped_size)147 void set(void *mapped_address, size_t mapped_size) {
148 is_set_ = true;
149 base_ = mapped_address;
150 size_ = mapped_size;
151 }
release()152 void release() {
153 assert(is_set_);
154 is_set_ = false;
155 base_ = NULL;
156 size_ = 0;
157 }
158
159 private:
160 bool is_set_;
161 void* base_;
162 size_t size_;
163 };
164
165 // Find the preferred loading address of the binary.
166 template<typename ElfClass>
GetLoadingAddress(const typename ElfClass::Phdr * program_headers,int nheader)167 typename ElfClass::Addr GetLoadingAddress(
168 const typename ElfClass::Phdr* program_headers,
169 int nheader) {
170 typedef typename ElfClass::Phdr Phdr;
171
172 // For non-PIC executables (e_type == ET_EXEC), the load address is
173 // the start address of the first PT_LOAD segment. (ELF requires
174 // the segments to be sorted by load address.) For PIC executables
175 // and dynamic libraries (e_type == ET_DYN), this address will
176 // normally be zero.
177 for (int i = 0; i < nheader; ++i) {
178 const Phdr& header = program_headers[i];
179 if (header.p_type == PT_LOAD)
180 return header.p_vaddr;
181 }
182 return 0;
183 }
184
185 // Find the set of address ranges for all PT_LOAD segments.
186 template <typename ElfClass>
GetPtLoadSegmentRanges(const typename ElfClass::Phdr * program_headers,int nheader)187 vector<Module::Range> GetPtLoadSegmentRanges(
188 const typename ElfClass::Phdr* program_headers,
189 int nheader) {
190 typedef typename ElfClass::Phdr Phdr;
191 vector<Module::Range> ranges;
192
193 for (int i = 0; i < nheader; ++i) {
194 const Phdr& header = program_headers[i];
195 if (header.p_type == PT_LOAD) {
196 ranges.push_back(Module::Range(header.p_vaddr, header.p_memsz));
197 }
198 }
199 return ranges;
200 }
201
202 #ifndef NO_STABS_SUPPORT
203 template<typename ElfClass>
LoadStabs(const typename ElfClass::Ehdr * elf_header,const typename ElfClass::Shdr * stab_section,const typename ElfClass::Shdr * stabstr_section,const bool big_endian,Module * module)204 bool LoadStabs(const typename ElfClass::Ehdr* elf_header,
205 const typename ElfClass::Shdr* stab_section,
206 const typename ElfClass::Shdr* stabstr_section,
207 const bool big_endian,
208 Module* module) {
209 // A callback object to handle data from the STABS reader.
210 StabsToModule handler(module);
211 // Find the addresses of the STABS data, and create a STABS reader object.
212 // On Linux, STABS entries always have 32-bit values, regardless of the
213 // address size of the architecture whose code they're describing, and
214 // the strings are always "unitized".
215 const uint8_t* stabs =
216 GetOffset<ElfClass, uint8_t>(elf_header, stab_section->sh_offset);
217 const uint8_t* stabstr =
218 GetOffset<ElfClass, uint8_t>(elf_header, stabstr_section->sh_offset);
219 google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
220 stabstr, stabstr_section->sh_size,
221 big_endian, 4, true, &handler);
222 // Read the STABS data, and do post-processing.
223 if (!reader.Process())
224 return false;
225 handler.Finalize();
226 return true;
227 }
228 #endif // NO_STABS_SUPPORT
229
230 // A range handler that accepts rangelist data parsed by
231 // dwarf2reader::RangeListReader and populates a range vector (typically
232 // owned by a function) with the results.
233 class DumperRangesHandler : public DwarfCUToModule::RangesHandler {
234 public:
DumperRangesHandler(const uint8_t * buffer,uint64 size,dwarf2reader::ByteReader * reader)235 DumperRangesHandler(const uint8_t *buffer, uint64 size,
236 dwarf2reader::ByteReader* reader)
237 : buffer_(buffer), size_(size), reader_(reader) { }
238
ReadRanges(uint64 offset,Module::Address base_address,vector<Module::Range> * ranges)239 bool ReadRanges(uint64 offset, Module::Address base_address,
240 vector<Module::Range>* ranges) {
241 DwarfRangeListHandler handler(base_address, ranges);
242 dwarf2reader::RangeListReader rangelist_reader(buffer_, size_, reader_,
243 &handler);
244
245 return rangelist_reader.ReadRangeList(offset);
246 }
247
248 private:
249 const uint8_t *buffer_;
250 uint64 size_;
251 dwarf2reader::ByteReader* reader_;
252 };
253
254 // A line-to-module loader that accepts line number info parsed by
255 // dwarf2reader::LineInfo and populates a Module and a line vector
256 // with the results.
257 class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler {
258 public:
259 // Create a line-to-module converter using BYTE_READER.
DumperLineToModule(dwarf2reader::ByteReader * byte_reader)260 explicit DumperLineToModule(dwarf2reader::ByteReader *byte_reader)
261 : byte_reader_(byte_reader) { }
StartCompilationUnit(const string & compilation_dir)262 void StartCompilationUnit(const string& compilation_dir) {
263 compilation_dir_ = compilation_dir;
264 }
ReadProgram(const uint8_t * program,uint64 length,Module * module,std::vector<Module::Line> * lines)265 void ReadProgram(const uint8_t *program, uint64 length,
266 Module* module, std::vector<Module::Line>* lines) {
267 DwarfLineToModule handler(module, compilation_dir_, lines);
268 dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler);
269 parser.Start();
270 }
271 private:
272 string compilation_dir_;
273 dwarf2reader::ByteReader *byte_reader_;
274 };
275
276 template<typename ElfClass>
LoadDwarf(const string & dwarf_filename,const typename ElfClass::Ehdr * elf_header,const bool big_endian,bool handle_inter_cu_refs,Module * module)277 bool LoadDwarf(const string& dwarf_filename,
278 const typename ElfClass::Ehdr* elf_header,
279 const bool big_endian,
280 bool handle_inter_cu_refs,
281 Module* module) {
282 typedef typename ElfClass::Shdr Shdr;
283
284 const dwarf2reader::Endianness endianness = big_endian ?
285 dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
286 dwarf2reader::ByteReader byte_reader(endianness);
287
288 // Construct a context for this file.
289 DwarfCUToModule::FileContext file_context(dwarf_filename,
290 module,
291 handle_inter_cu_refs);
292
293 // Build a map of the ELF file's sections.
294 const Shdr* sections =
295 GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
296 int num_sections = elf_header->e_shnum;
297 const Shdr* section_names = sections + elf_header->e_shstrndx;
298 for (int i = 0; i < num_sections; i++) {
299 const Shdr* section = §ions[i];
300 string name = GetOffset<ElfClass, char>(elf_header,
301 section_names->sh_offset) +
302 section->sh_name;
303 const uint8_t *contents = GetOffset<ElfClass, uint8_t>(elf_header,
304 section->sh_offset);
305 file_context.AddSectionToSectionMap(name, contents, section->sh_size);
306 }
307
308 // Optional .debug_ranges reader
309 scoped_ptr<DumperRangesHandler> ranges_handler;
310 dwarf2reader::SectionMap::const_iterator ranges_entry =
311 file_context.section_map().find(".debug_ranges");
312 if (ranges_entry != file_context.section_map().end()) {
313 const std::pair<const uint8_t *, uint64>& ranges_section =
314 ranges_entry->second;
315 ranges_handler.reset(
316 new DumperRangesHandler(ranges_section.first, ranges_section.second,
317 &byte_reader));
318 }
319
320 // Parse all the compilation units in the .debug_info section.
321 DumperLineToModule line_to_module(&byte_reader);
322 dwarf2reader::SectionMap::const_iterator debug_info_entry =
323 file_context.section_map().find(".debug_info");
324 assert(debug_info_entry != file_context.section_map().end());
325 const std::pair<const uint8_t *, uint64>& debug_info_section =
326 debug_info_entry->second;
327 // This should never have been called if the file doesn't have a
328 // .debug_info section.
329 assert(debug_info_section.first);
330 uint64 debug_info_length = debug_info_section.second;
331 for (uint64 offset = 0; offset < debug_info_length;) {
332 // Make a handler for the root DIE that populates MODULE with the
333 // data that was found.
334 DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset);
335 DwarfCUToModule root_handler(&file_context, &line_to_module,
336 ranges_handler.get(), &reporter);
337 // Make a Dwarf2Handler that drives the DIEHandler.
338 dwarf2reader::DIEDispatcher die_dispatcher(&root_handler);
339 // Make a DWARF parser for the compilation unit at OFFSET.
340 dwarf2reader::CompilationUnit reader(dwarf_filename,
341 file_context.section_map(),
342 offset,
343 &byte_reader,
344 &die_dispatcher);
345 // Process the entire compilation unit; get the offset of the next.
346 offset += reader.Start();
347 }
348 return true;
349 }
350
351 // Fill REGISTER_NAMES with the register names appropriate to the
352 // machine architecture given in HEADER, indexed by the register
353 // numbers used in DWARF call frame information. Return true on
354 // success, or false if HEADER's machine architecture is not
355 // supported.
356 template<typename ElfClass>
DwarfCFIRegisterNames(const typename ElfClass::Ehdr * elf_header,std::vector<string> * register_names)357 bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
358 std::vector<string>* register_names) {
359 switch (elf_header->e_machine) {
360 case EM_386:
361 *register_names = DwarfCFIToModule::RegisterNames::I386();
362 return true;
363 case EM_ARM:
364 *register_names = DwarfCFIToModule::RegisterNames::ARM();
365 return true;
366 case EM_AARCH64:
367 *register_names = DwarfCFIToModule::RegisterNames::ARM64();
368 return true;
369 case EM_MIPS:
370 *register_names = DwarfCFIToModule::RegisterNames::MIPS();
371 return true;
372 case EM_X86_64:
373 *register_names = DwarfCFIToModule::RegisterNames::X86_64();
374 return true;
375 default:
376 return false;
377 }
378 }
379
380 template<typename ElfClass>
LoadDwarfCFI(const string & dwarf_filename,const typename ElfClass::Ehdr * elf_header,const char * section_name,const typename ElfClass::Shdr * section,const bool eh_frame,const typename ElfClass::Shdr * got_section,const typename ElfClass::Shdr * text_section,const bool big_endian,Module * module)381 bool LoadDwarfCFI(const string& dwarf_filename,
382 const typename ElfClass::Ehdr* elf_header,
383 const char* section_name,
384 const typename ElfClass::Shdr* section,
385 const bool eh_frame,
386 const typename ElfClass::Shdr* got_section,
387 const typename ElfClass::Shdr* text_section,
388 const bool big_endian,
389 Module* module) {
390 // Find the appropriate set of register names for this file's
391 // architecture.
392 std::vector<string> register_names;
393 if (!DwarfCFIRegisterNames<ElfClass>(elf_header, ®ister_names)) {
394 fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';"
395 " cannot convert DWARF call frame information\n",
396 dwarf_filename.c_str(), elf_header->e_machine);
397 return false;
398 }
399
400 const dwarf2reader::Endianness endianness = big_endian ?
401 dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
402
403 // Find the call frame information and its size.
404 const uint8_t *cfi =
405 GetOffset<ElfClass, uint8_t>(elf_header, section->sh_offset);
406 size_t cfi_size = section->sh_size;
407
408 // Plug together the parser, handler, and their entourages.
409 DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name);
410 DwarfCFIToModule handler(module, register_names, &module_reporter);
411 dwarf2reader::ByteReader byte_reader(endianness);
412
413 byte_reader.SetAddressSize(ElfClass::kAddrSize);
414
415 // Provide the base addresses for .eh_frame encoded pointers, if
416 // possible.
417 byte_reader.SetCFIDataBase(section->sh_addr, cfi);
418 if (got_section)
419 byte_reader.SetDataBase(got_section->sh_addr);
420 if (text_section)
421 byte_reader.SetTextBase(text_section->sh_addr);
422
423 dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename,
424 section_name);
425 dwarf2reader::CallFrameInfo parser(cfi, cfi_size,
426 &byte_reader, &handler, &dwarf_reporter,
427 eh_frame);
428 parser.Start();
429 return true;
430 }
431
LoadELF(const string & obj_file,MmapWrapper * map_wrapper,void ** elf_header)432 bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
433 void** elf_header) {
434 int obj_fd = open(obj_file.c_str(), O_RDONLY);
435 if (obj_fd < 0) {
436 fprintf(stderr, "Failed to open ELF file '%s': %s\n",
437 obj_file.c_str(), strerror(errno));
438 return false;
439 }
440 FDWrapper obj_fd_wrapper(obj_fd);
441 struct stat st;
442 if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
443 fprintf(stderr, "Unable to fstat ELF file '%s': %s\n",
444 obj_file.c_str(), strerror(errno));
445 return false;
446 }
447 void* obj_base = mmap(NULL, st.st_size,
448 PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0);
449 if (obj_base == MAP_FAILED) {
450 fprintf(stderr, "Failed to mmap ELF file '%s': %s\n",
451 obj_file.c_str(), strerror(errno));
452 return false;
453 }
454 map_wrapper->set(obj_base, st.st_size);
455 *elf_header = obj_base;
456 if (!IsValidElf(*elf_header)) {
457 fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
458 return false;
459 }
460 return true;
461 }
462
463 // Get the endianness of ELF_HEADER. If it's invalid, return false.
464 template<typename ElfClass>
ElfEndianness(const typename ElfClass::Ehdr * elf_header,bool * big_endian)465 bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
466 bool* big_endian) {
467 if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
468 *big_endian = false;
469 return true;
470 }
471 if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
472 *big_endian = true;
473 return true;
474 }
475
476 fprintf(stderr, "bad data encoding in ELF header: %d\n",
477 elf_header->e_ident[EI_DATA]);
478 return false;
479 }
480
481 // Given |left_abspath|, find the absolute path for |right_path| and see if the
482 // two absolute paths are the same.
IsSameFile(const char * left_abspath,const string & right_path)483 bool IsSameFile(const char* left_abspath, const string& right_path) {
484 char right_abspath[PATH_MAX];
485 if (!realpath(right_path.c_str(), right_abspath))
486 return false;
487 return strcmp(left_abspath, right_abspath) == 0;
488 }
489
490 // Read the .gnu_debuglink and get the debug file name. If anything goes
491 // wrong, return an empty string.
ReadDebugLink(const uint8_t * debuglink,const size_t debuglink_size,const bool big_endian,const string & obj_file,const std::vector<string> & debug_dirs)492 string ReadDebugLink(const uint8_t *debuglink,
493 const size_t debuglink_size,
494 const bool big_endian,
495 const string& obj_file,
496 const std::vector<string>& debug_dirs) {
497 // Include '\0' + CRC32 (4 bytes).
498 size_t debuglink_len = strlen(reinterpret_cast<const char *>(debuglink)) + 5;
499 debuglink_len = 4 * ((debuglink_len + 3) / 4); // Round up to 4 bytes.
500
501 // Sanity check.
502 if (debuglink_len != debuglink_size) {
503 fprintf(stderr, "Mismatched .gnu_debuglink string / section size: "
504 "%zx %zx\n", debuglink_len, debuglink_size);
505 return string();
506 }
507
508 char obj_file_abspath[PATH_MAX];
509 if (!realpath(obj_file.c_str(), obj_file_abspath)) {
510 fprintf(stderr, "Cannot resolve absolute path for %s\n", obj_file.c_str());
511 return string();
512 }
513
514 std::vector<string> searched_paths;
515 string debuglink_path;
516 std::vector<string>::const_iterator it;
517 for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) {
518 const string& debug_dir = *it;
519 debuglink_path = debug_dir + "/" +
520 reinterpret_cast<const char *>(debuglink);
521
522 // There is the annoying case of /path/to/foo.so having foo.so as the
523 // debug link file name. Thus this may end up opening /path/to/foo.so again,
524 // and there is a small chance of the two files having the same CRC.
525 if (IsSameFile(obj_file_abspath, debuglink_path))
526 continue;
527
528 searched_paths.push_back(debug_dir);
529 int debuglink_fd = open(debuglink_path.c_str(), O_RDONLY);
530 if (debuglink_fd < 0)
531 continue;
532
533 FDWrapper debuglink_fd_wrapper(debuglink_fd);
534
535 // The CRC is the last 4 bytes in |debuglink|.
536 const dwarf2reader::Endianness endianness = big_endian ?
537 dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
538 dwarf2reader::ByteReader byte_reader(endianness);
539 uint32_t expected_crc =
540 byte_reader.ReadFourBytes(&debuglink[debuglink_size - 4]);
541
542 uint32_t actual_crc = 0;
543 while (true) {
544 const size_t kReadSize = 4096;
545 char buf[kReadSize];
546 ssize_t bytes_read = HANDLE_EINTR(read(debuglink_fd, &buf, kReadSize));
547 if (bytes_read < 0) {
548 fprintf(stderr, "Error reading debug ELF file %s.\n",
549 debuglink_path.c_str());
550 return string();
551 }
552 if (bytes_read == 0)
553 break;
554 actual_crc = google_breakpad::UpdateCrc32(actual_crc, buf, bytes_read);
555 }
556 if (actual_crc != expected_crc) {
557 fprintf(stderr, "Error reading debug ELF file - CRC32 mismatch: %s\n",
558 debuglink_path.c_str());
559 continue;
560 }
561
562 // Found debug file.
563 return debuglink_path;
564 }
565
566 // Not found case.
567 fprintf(stderr, "Failed to find debug ELF file for '%s' after trying:\n",
568 obj_file.c_str());
569 for (it = searched_paths.begin(); it < searched_paths.end(); ++it) {
570 const string& debug_dir = *it;
571 fprintf(stderr, " %s/%s\n", debug_dir.c_str(), debuglink);
572 }
573 return string();
574 }
575
576 //
577 // LoadSymbolsInfo
578 //
579 // Holds the state between the two calls to LoadSymbols() in case it's necessary
580 // to follow the .gnu_debuglink section and load debug information from a
581 // different file.
582 //
583 template<typename ElfClass>
584 class LoadSymbolsInfo {
585 public:
586 typedef typename ElfClass::Addr Addr;
587
LoadSymbolsInfo(const std::vector<string> & dbg_dirs)588 explicit LoadSymbolsInfo(const std::vector<string>& dbg_dirs) :
589 debug_dirs_(dbg_dirs),
590 has_loading_addr_(false) {}
591
592 // Keeps track of which sections have been loaded so sections don't
593 // accidentally get loaded twice from two different files.
LoadedSection(const string & section)594 void LoadedSection(const string §ion) {
595 if (loaded_sections_.count(section) == 0) {
596 loaded_sections_.insert(section);
597 } else {
598 fprintf(stderr, "Section %s has already been loaded.\n",
599 section.c_str());
600 }
601 }
602
603 // The ELF file and linked debug file are expected to have the same preferred
604 // loading address.
set_loading_addr(Addr addr,const string & filename)605 void set_loading_addr(Addr addr, const string &filename) {
606 if (!has_loading_addr_) {
607 loading_addr_ = addr;
608 loaded_file_ = filename;
609 return;
610 }
611
612 if (addr != loading_addr_) {
613 fprintf(stderr,
614 "ELF file '%s' and debug ELF file '%s' "
615 "have different load addresses.\n",
616 loaded_file_.c_str(), filename.c_str());
617 assert(false);
618 }
619 }
620
621 // Setters and getters
debug_dirs() const622 const std::vector<string>& debug_dirs() const {
623 return debug_dirs_;
624 }
625
debuglink_file() const626 string debuglink_file() const {
627 return debuglink_file_;
628 }
set_debuglink_file(string file)629 void set_debuglink_file(string file) {
630 debuglink_file_ = file;
631 }
632
633 private:
634 const std::vector<string>& debug_dirs_; // Directories in which to
635 // search for the debug ELF file.
636
637 string debuglink_file_; // Full path to the debug ELF file.
638
639 bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid.
640
641 Addr loading_addr_; // Saves the preferred loading address from the
642 // first call to LoadSymbols().
643
644 string loaded_file_; // Name of the file loaded from the first call to
645 // LoadSymbols().
646
647 std::set<string> loaded_sections_; // Tracks the Loaded ELF sections
648 // between calls to LoadSymbols().
649 };
650
651 template<typename ElfClass>
LoadSymbols(const string & obj_file,const bool big_endian,const typename ElfClass::Ehdr * elf_header,const bool read_gnu_debug_link,LoadSymbolsInfo<ElfClass> * info,const DumpOptions & options,Module * module)652 bool LoadSymbols(const string& obj_file,
653 const bool big_endian,
654 const typename ElfClass::Ehdr* elf_header,
655 const bool read_gnu_debug_link,
656 LoadSymbolsInfo<ElfClass>* info,
657 const DumpOptions& options,
658 Module* module) {
659 typedef typename ElfClass::Addr Addr;
660 typedef typename ElfClass::Phdr Phdr;
661 typedef typename ElfClass::Shdr Shdr;
662
663 Addr loading_addr = GetLoadingAddress<ElfClass>(
664 GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
665 elf_header->e_phnum);
666 module->SetLoadAddress(loading_addr);
667 info->set_loading_addr(loading_addr, obj_file);
668
669 // Allow filtering of extraneous debug information in partitioned libraries.
670 // Such libraries contain debug information for all libraries extracted from
671 // the same combined library, implying extensive duplication.
672 vector<Module::Range> address_ranges = GetPtLoadSegmentRanges<ElfClass>(
673 GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
674 elf_header->e_phnum);
675 module->SetAddressRanges(address_ranges);
676
677 const Shdr* sections =
678 GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
679 const Shdr* section_names = sections + elf_header->e_shstrndx;
680 const char* names =
681 GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
682 const char *names_end = names + section_names->sh_size;
683 bool found_debug_info_section = false;
684 bool found_usable_info = false;
685
686 if (options.symbol_data != ONLY_CFI) {
687 #ifndef NO_STABS_SUPPORT
688 // Look for STABS debugging information, and load it if present.
689 const Shdr* stab_section =
690 FindElfSectionByName<ElfClass>(".stab", SHT_PROGBITS,
691 sections, names, names_end,
692 elf_header->e_shnum);
693 if (stab_section) {
694 const Shdr* stabstr_section = stab_section->sh_link + sections;
695 if (stabstr_section) {
696 found_debug_info_section = true;
697 found_usable_info = true;
698 info->LoadedSection(".stab");
699 if (!LoadStabs<ElfClass>(elf_header, stab_section, stabstr_section,
700 big_endian, module)) {
701 fprintf(stderr, "%s: \".stab\" section found, but failed to load"
702 " STABS debugging information\n", obj_file.c_str());
703 }
704 }
705 }
706 #endif // NO_STABS_SUPPORT
707
708 // Look for DWARF debugging information, and load it if present.
709 const Shdr* dwarf_section =
710 FindElfSectionByName<ElfClass>(".debug_info", SHT_PROGBITS,
711 sections, names, names_end,
712 elf_header->e_shnum);
713
714 // .debug_info section type is SHT_PROGBITS for mips on pnacl toolchains,
715 // but MIPS_DWARF for regular gnu toolchains, so both need to be checked
716 if (elf_header->e_machine == EM_MIPS && !dwarf_section) {
717 dwarf_section =
718 FindElfSectionByName<ElfClass>(".debug_info", SHT_MIPS_DWARF,
719 sections, names, names_end,
720 elf_header->e_shnum);
721 }
722
723 if (dwarf_section) {
724 found_debug_info_section = true;
725 found_usable_info = true;
726 info->LoadedSection(".debug_info");
727 if (!LoadDwarf<ElfClass>(obj_file, elf_header, big_endian,
728 options.handle_inter_cu_refs, module)) {
729 fprintf(stderr, "%s: \".debug_info\" section found, but failed to load "
730 "DWARF debugging information\n", obj_file.c_str());
731 }
732 }
733
734 // See if there are export symbols available.
735 const Shdr* symtab_section =
736 FindElfSectionByName<ElfClass>(".symtab", SHT_SYMTAB,
737 sections, names, names_end,
738 elf_header->e_shnum);
739 const Shdr* strtab_section =
740 FindElfSectionByName<ElfClass>(".strtab", SHT_STRTAB,
741 sections, names, names_end,
742 elf_header->e_shnum);
743 if (symtab_section && strtab_section) {
744 info->LoadedSection(".symtab");
745
746 const uint8_t* symtab =
747 GetOffset<ElfClass, uint8_t>(elf_header,
748 symtab_section->sh_offset);
749 const uint8_t* strtab =
750 GetOffset<ElfClass, uint8_t>(elf_header,
751 strtab_section->sh_offset);
752 bool result =
753 ELFSymbolsToModule(symtab,
754 symtab_section->sh_size,
755 strtab,
756 strtab_section->sh_size,
757 big_endian,
758 ElfClass::kAddrSize,
759 module);
760 found_usable_info = found_usable_info || result;
761 } else {
762 // Look in dynsym only if full symbol table was not available.
763 const Shdr* dynsym_section =
764 FindElfSectionByName<ElfClass>(".dynsym", SHT_DYNSYM,
765 sections, names, names_end,
766 elf_header->e_shnum);
767 const Shdr* dynstr_section =
768 FindElfSectionByName<ElfClass>(".dynstr", SHT_STRTAB,
769 sections, names, names_end,
770 elf_header->e_shnum);
771 if (dynsym_section && dynstr_section) {
772 info->LoadedSection(".dynsym");
773
774 const uint8_t* dynsyms =
775 GetOffset<ElfClass, uint8_t>(elf_header,
776 dynsym_section->sh_offset);
777 const uint8_t* dynstrs =
778 GetOffset<ElfClass, uint8_t>(elf_header,
779 dynstr_section->sh_offset);
780 bool result =
781 ELFSymbolsToModule(dynsyms,
782 dynsym_section->sh_size,
783 dynstrs,
784 dynstr_section->sh_size,
785 big_endian,
786 ElfClass::kAddrSize,
787 module);
788 found_usable_info = found_usable_info || result;
789 }
790 }
791 }
792
793 if (options.symbol_data != NO_CFI) {
794 // Dwarf Call Frame Information (CFI) is actually independent from
795 // the other DWARF debugging information, and can be used alone.
796 const Shdr* dwarf_cfi_section =
797 FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS,
798 sections, names, names_end,
799 elf_header->e_shnum);
800
801 // .debug_frame section type is SHT_PROGBITS for mips on pnacl toolchains,
802 // but MIPS_DWARF for regular gnu toolchains, so both need to be checked
803 if (elf_header->e_machine == EM_MIPS && !dwarf_cfi_section) {
804 dwarf_cfi_section =
805 FindElfSectionByName<ElfClass>(".debug_frame", SHT_MIPS_DWARF,
806 sections, names, names_end,
807 elf_header->e_shnum);
808 }
809
810 if (dwarf_cfi_section) {
811 // Ignore the return value of this function; even without call frame
812 // information, the other debugging information could be perfectly
813 // useful.
814 info->LoadedSection(".debug_frame");
815 bool result =
816 LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
817 dwarf_cfi_section, false, 0, 0, big_endian,
818 module);
819 found_usable_info = found_usable_info || result;
820 }
821
822 // Linux C++ exception handling information can also provide
823 // unwinding data.
824 const Shdr* eh_frame_section =
825 FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS,
826 sections, names, names_end,
827 elf_header->e_shnum);
828 if (eh_frame_section) {
829 // Pointers in .eh_frame data may be relative to the base addresses of
830 // certain sections. Provide those sections if present.
831 const Shdr* got_section =
832 FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS,
833 sections, names, names_end,
834 elf_header->e_shnum);
835 const Shdr* text_section =
836 FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS,
837 sections, names, names_end,
838 elf_header->e_shnum);
839 info->LoadedSection(".eh_frame");
840 // As above, ignore the return value of this function.
841 bool result =
842 LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame",
843 eh_frame_section, true,
844 got_section, text_section, big_endian, module);
845 found_usable_info = found_usable_info || result;
846 }
847 }
848
849 if (!found_debug_info_section) {
850 fprintf(stderr, "%s: file contains no debugging information"
851 " (no \".stab\" or \".debug_info\" sections)\n",
852 obj_file.c_str());
853
854 // Failed, but maybe there's a .gnu_debuglink section?
855 if (read_gnu_debug_link) {
856 const Shdr* gnu_debuglink_section
857 = FindElfSectionByName<ElfClass>(".gnu_debuglink", SHT_PROGBITS,
858 sections, names,
859 names_end, elf_header->e_shnum);
860 if (gnu_debuglink_section) {
861 if (!info->debug_dirs().empty()) {
862 const uint8_t *debuglink_contents =
863 GetOffset<ElfClass, uint8_t>(elf_header,
864 gnu_debuglink_section->sh_offset);
865 string debuglink_file =
866 ReadDebugLink(debuglink_contents,
867 gnu_debuglink_section->sh_size,
868 big_endian,
869 obj_file,
870 info->debug_dirs());
871 info->set_debuglink_file(debuglink_file);
872 } else {
873 fprintf(stderr, ".gnu_debuglink section found in '%s', "
874 "but no debug path specified.\n", obj_file.c_str());
875 }
876 } else {
877 fprintf(stderr, "%s does not contain a .gnu_debuglink section.\n",
878 obj_file.c_str());
879 }
880 } else {
881 // Return true if some usable information was found, since the caller
882 // doesn't want to use .gnu_debuglink.
883 return found_usable_info;
884 }
885
886 // No debug info was found, let the user try again with .gnu_debuglink
887 // if present.
888 return false;
889 }
890
891 return true;
892 }
893
894 // Return the breakpad symbol file identifier for the architecture of
895 // ELF_HEADER.
896 template<typename ElfClass>
ElfArchitecture(const typename ElfClass::Ehdr * elf_header)897 const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
898 typedef typename ElfClass::Half Half;
899 Half arch = elf_header->e_machine;
900 switch (arch) {
901 case EM_386: return "x86";
902 case EM_ARM: return "arm";
903 case EM_AARCH64: return "arm64";
904 case EM_MIPS: return "mips";
905 case EM_PPC64: return "ppc64";
906 case EM_PPC: return "ppc";
907 case EM_S390: return "s390";
908 case EM_SPARC: return "sparc";
909 case EM_SPARCV9: return "sparcv9";
910 case EM_X86_64: return "x86_64";
911 default: return NULL;
912 }
913 }
914
915 template<typename ElfClass>
SanitizeDebugFile(const typename ElfClass::Ehdr * debug_elf_header,const string & debuglink_file,const string & obj_filename,const char * obj_file_architecture,const bool obj_file_is_big_endian)916 bool SanitizeDebugFile(const typename ElfClass::Ehdr* debug_elf_header,
917 const string& debuglink_file,
918 const string& obj_filename,
919 const char* obj_file_architecture,
920 const bool obj_file_is_big_endian) {
921 const char* debug_architecture =
922 ElfArchitecture<ElfClass>(debug_elf_header);
923 if (!debug_architecture) {
924 fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
925 debuglink_file.c_str(), debug_elf_header->e_machine);
926 return false;
927 }
928 if (strcmp(obj_file_architecture, debug_architecture)) {
929 fprintf(stderr, "%s with ELF machine architecture %s does not match "
930 "%s with ELF architecture %s\n",
931 debuglink_file.c_str(), debug_architecture,
932 obj_filename.c_str(), obj_file_architecture);
933 return false;
934 }
935 bool debug_big_endian;
936 if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
937 return false;
938 if (debug_big_endian != obj_file_is_big_endian) {
939 fprintf(stderr, "%s and %s does not match in endianness\n",
940 obj_filename.c_str(), debuglink_file.c_str());
941 return false;
942 }
943 return true;
944 }
945
946 template<typename ElfClass>
InitModuleForElfClass(const typename ElfClass::Ehdr * elf_header,const string & obj_filename,const string & obj_os,scoped_ptr<Module> & module)947 bool InitModuleForElfClass(const typename ElfClass::Ehdr* elf_header,
948 const string& obj_filename,
949 const string& obj_os,
950 scoped_ptr<Module>& module) {
951 PageAllocator allocator;
952 wasteful_vector<uint8_t> identifier(&allocator, kDefaultBuildIdSize);
953 if (!FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
954 fprintf(stderr, "%s: unable to generate file identifier\n",
955 obj_filename.c_str());
956 return false;
957 }
958
959 const char *architecture = ElfArchitecture<ElfClass>(elf_header);
960 if (!architecture) {
961 fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
962 obj_filename.c_str(), elf_header->e_machine);
963 return false;
964 }
965
966 char name_buf[NAME_MAX] = {};
967 std::string name = google_breakpad::ElfFileSoNameFromMappedFile(
968 elf_header, name_buf, sizeof(name_buf))
969 ? name_buf
970 : google_breakpad::BaseName(obj_filename);
971
972 // Add an extra "0" at the end. PDB files on Windows have an 'age'
973 // number appended to the end of the file identifier; this isn't
974 // really used or necessary on other platforms, but be consistent.
975 string id = FileID::ConvertIdentifierToUUIDString(identifier) + "0";
976 // This is just the raw Build ID in hex.
977 string code_id = FileID::ConvertIdentifierToString(identifier);
978
979 module.reset(new Module(name, obj_os, architecture, id, code_id));
980
981 return true;
982 }
983
984 template<typename ElfClass>
ReadSymbolDataElfClass(const typename ElfClass::Ehdr * elf_header,const string & obj_filename,const string & obj_os,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** out_module)985 bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
986 const string& obj_filename,
987 const string& obj_os,
988 const std::vector<string>& debug_dirs,
989 const DumpOptions& options,
990 Module** out_module) {
991 typedef typename ElfClass::Ehdr Ehdr;
992
993 *out_module = NULL;
994
995 scoped_ptr<Module> module;
996 if (!InitModuleForElfClass<ElfClass>(elf_header, obj_filename, obj_os,
997 module)) {
998 return false;
999 }
1000
1001 // Figure out what endianness this file is.
1002 bool big_endian;
1003 if (!ElfEndianness<ElfClass>(elf_header, &big_endian))
1004 return false;
1005
1006 LoadSymbolsInfo<ElfClass> info(debug_dirs);
1007 if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
1008 !debug_dirs.empty(), &info,
1009 options, module.get())) {
1010 const string debuglink_file = info.debuglink_file();
1011 if (debuglink_file.empty())
1012 return false;
1013
1014 // Load debuglink ELF file.
1015 fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
1016 MmapWrapper debug_map_wrapper;
1017 Ehdr* debug_elf_header = NULL;
1018 if (!LoadELF(debuglink_file, &debug_map_wrapper,
1019 reinterpret_cast<void**>(&debug_elf_header)) ||
1020 !SanitizeDebugFile<ElfClass>(debug_elf_header, debuglink_file,
1021 obj_filename,
1022 module->architecture().c_str(),
1023 big_endian)) {
1024 return false;
1025 }
1026
1027 if (!LoadSymbols<ElfClass>(debuglink_file, big_endian,
1028 debug_elf_header, false, &info,
1029 options, module.get())) {
1030 return false;
1031 }
1032 }
1033
1034 *out_module = module.release();
1035 return true;
1036 }
1037
1038 } // namespace
1039
1040 namespace google_breakpad {
1041
1042 // Not explicitly exported, but not static so it can be used in unit tests.
ReadSymbolDataInternal(const uint8_t * obj_file,const string & obj_filename,const string & obj_os,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** module)1043 bool ReadSymbolDataInternal(const uint8_t* obj_file,
1044 const string& obj_filename,
1045 const string& obj_os,
1046 const std::vector<string>& debug_dirs,
1047 const DumpOptions& options,
1048 Module** module) {
1049 if (!IsValidElf(obj_file)) {
1050 fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
1051 return false;
1052 }
1053
1054 int elfclass = ElfClass(obj_file);
1055 if (elfclass == ELFCLASS32) {
1056 return ReadSymbolDataElfClass<ElfClass32>(
1057 reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, obj_os,
1058 debug_dirs, options, module);
1059 }
1060 if (elfclass == ELFCLASS64) {
1061 return ReadSymbolDataElfClass<ElfClass64>(
1062 reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, obj_os,
1063 debug_dirs, options, module);
1064 }
1065
1066 return false;
1067 }
1068
WriteSymbolFile(const string & load_path,const string & obj_file,const string & obj_os,const std::vector<string> & debug_dirs,const DumpOptions & options,std::ostream & sym_stream)1069 bool WriteSymbolFile(const string &load_path,
1070 const string &obj_file,
1071 const string &obj_os,
1072 const std::vector<string>& debug_dirs,
1073 const DumpOptions& options,
1074 std::ostream &sym_stream) {
1075 Module* module;
1076 if (!ReadSymbolData(load_path, obj_file, obj_os, debug_dirs, options,
1077 &module))
1078 return false;
1079
1080 bool result = module->Write(sym_stream, options.symbol_data);
1081 delete module;
1082 return result;
1083 }
1084
1085 // Read the selected object file's debugging information, and write out the
1086 // header only to |stream|. Return true on success; if an error occurs, report
1087 // it and return false.
WriteSymbolFileHeader(const string & load_path,const string & obj_file,const string & obj_os,std::ostream & sym_stream)1088 bool WriteSymbolFileHeader(const string& load_path,
1089 const string& obj_file,
1090 const string& obj_os,
1091 std::ostream &sym_stream) {
1092 MmapWrapper map_wrapper;
1093 void* elf_header = NULL;
1094 if (!LoadELF(load_path, &map_wrapper, &elf_header)) {
1095 fprintf(stderr, "Could not load ELF file: %s\n", obj_file.c_str());
1096 return false;
1097 }
1098
1099 if (!IsValidElf(elf_header)) {
1100 fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
1101 return false;
1102 }
1103
1104 int elfclass = ElfClass(elf_header);
1105 scoped_ptr<Module> module;
1106 if (elfclass == ELFCLASS32) {
1107 if (!InitModuleForElfClass<ElfClass32>(
1108 reinterpret_cast<const Elf32_Ehdr*>(elf_header), obj_file, obj_os,
1109 module)) {
1110 fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str());
1111 return false;
1112 }
1113 } else if (elfclass == ELFCLASS64) {
1114 if (!InitModuleForElfClass<ElfClass64>(
1115 reinterpret_cast<const Elf64_Ehdr*>(elf_header), obj_file, obj_os,
1116 module)) {
1117 fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str());
1118 return false;
1119 }
1120 } else {
1121 fprintf(stderr, "Unsupported module file: %s\n", obj_file.c_str());
1122 return false;
1123 }
1124
1125 return module->Write(sym_stream, ALL_SYMBOL_DATA);
1126 }
1127
ReadSymbolData(const string & load_path,const string & obj_file,const string & obj_os,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** module)1128 bool ReadSymbolData(const string& load_path,
1129 const string& obj_file,
1130 const string& obj_os,
1131 const std::vector<string>& debug_dirs,
1132 const DumpOptions& options,
1133 Module** module) {
1134 MmapWrapper map_wrapper;
1135 void* elf_header = NULL;
1136 if (!LoadELF(load_path, &map_wrapper, &elf_header))
1137 return false;
1138
1139 return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
1140 obj_file, obj_os, debug_dirs, options, module);
1141 }
1142
1143 } // namespace google_breakpad
1144