1 // -*- mode: C++ -*-
2 
3 // Copyright (c) 2010 Google Inc. All Rights Reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
32 
33 // This file contains definitions related to the DWARF2/3 reader and
34 // it's handler interfaces.
35 // The DWARF2/3 specification can be found at
36 // http://dwarf.freestandards.org and should be considered required
37 // reading if you wish to modify the implementation.
38 // Only a cursory attempt is made to explain terminology that is
39 // used here, as it is much better explained in the standard documents
40 #ifndef COMMON_DWARF_DWARF2READER_H__
41 #define COMMON_DWARF_DWARF2READER_H__
42 
43 #include <stdint.h>
44 
45 #include <list>
46 #include <map>
47 #include <string>
48 #include <utility>
49 #include <vector>
50 #include <memory>
51 
52 #include "common/dwarf/bytereader.h"
53 #include "common/dwarf/dwarf2enums.h"
54 #include "common/dwarf/types.h"
55 #include "common/using_std_string.h"
56 #include "common/dwarf/elf_reader.h"
57 
58 namespace dwarf2reader {
59 struct LineStateMachine;
60 class Dwarf2Handler;
61 class LineInfoHandler;
62 class DwpReader;
63 
64 // This maps from a string naming a section to a pair containing a
65 // the data for the section, and the size of the section.
66 typedef std::map<string, std::pair<const uint8_t *, uint64> > SectionMap;
67 typedef std::list<std::pair<enum DwarfAttribute, enum DwarfForm> >
68     AttributeList;
69 typedef AttributeList::iterator AttributeIterator;
70 typedef AttributeList::const_iterator ConstAttributeIterator;
71 
72 struct LineInfoHeader {
73   uint64 total_length;
74   uint16 version;
75   uint64 prologue_length;
76   uint8 min_insn_length; // insn stands for instructin
77   bool default_is_stmt; // stmt stands for statement
78   int8 line_base;
79   uint8 line_range;
80   uint8 opcode_base;
81   // Use a pointer so that signalsafe_addr2line is able to use this structure
82   // without heap allocation problem.
83   std::vector<unsigned char> *std_opcode_lengths;
84 };
85 
86 class LineInfo {
87  public:
88 
89   // Initializes a .debug_line reader. Buffer and buffer length point
90   // to the beginning and length of the line information to read.
91   // Reader is a ByteReader class that has the endianness set
92   // properly.
93   LineInfo(const uint8_t *buffer_, uint64 buffer_length,
94            ByteReader* reader, LineInfoHandler* handler);
95 
~LineInfo()96   virtual ~LineInfo() {
97     if (header_.std_opcode_lengths) {
98       delete header_.std_opcode_lengths;
99     }
100   }
101 
102   // Start processing line info, and calling callbacks in the handler.
103   // Consumes the line number information for a single compilation unit.
104   // Returns the number of bytes processed.
105   uint64 Start();
106 
107   // Process a single line info opcode at START using the state
108   // machine at LSM.  Return true if we should define a line using the
109   // current state of the line state machine.  Place the length of the
110   // opcode in LEN.
111   // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm
112   // passes the address of PC. In other words, LSM_PASSES_PC will be
113   // set to true, if the following condition is met.
114   //
115   // lsm's old address < PC <= lsm's new address
116   static bool ProcessOneOpcode(ByteReader* reader,
117                                LineInfoHandler* handler,
118                                const struct LineInfoHeader &header,
119                                const uint8_t *start,
120                                struct LineStateMachine* lsm,
121                                size_t* len,
122                                uintptr pc,
123                                bool *lsm_passes_pc);
124 
125  private:
126   // Reads the DWARF2/3 header for this line info.
127   void ReadHeader();
128 
129   // Reads the DWARF2/3 line information
130   void ReadLines();
131 
132   // The associated handler to call processing functions in
133   LineInfoHandler* handler_;
134 
135   // The associated ByteReader that handles endianness issues for us
136   ByteReader* reader_;
137 
138   // A DWARF2/3 line info header.  This is not the same size as
139   // in the actual file, as the one in the file may have a 32 bit or
140   // 64 bit lengths
141 
142   struct LineInfoHeader header_;
143 
144   // buffer is the buffer for our line info, starting at exactly where
145   // the line info to read is.  after_header is the place right after
146   // the end of the line information header.
147   const uint8_t *buffer_;
148 #ifndef NDEBUG
149   uint64 buffer_length_;
150 #endif
151   const uint8_t *after_header_;
152 };
153 
154 // This class is the main interface between the line info reader and
155 // the client.  The virtual functions inside this get called for
156 // interesting events that happen during line info reading.  The
157 // default implementation does nothing
158 
159 class LineInfoHandler {
160  public:
LineInfoHandler()161   LineInfoHandler() { }
162 
~LineInfoHandler()163   virtual ~LineInfoHandler() { }
164 
165   // Called when we define a directory.  NAME is the directory name,
166   // DIR_NUM is the directory number
DefineDir(const string & name,uint32 dir_num)167   virtual void DefineDir(const string& name, uint32 dir_num) { }
168 
169   // Called when we define a filename. NAME is the filename, FILE_NUM
170   // is the file number which is -1 if the file index is the next
171   // index after the last numbered index (this happens when files are
172   // dynamically defined by the line program), DIR_NUM is the
173   // directory index for the directory name of this file, MOD_TIME is
174   // the modification time of the file, and LENGTH is the length of
175   // the file
DefineFile(const string & name,int32 file_num,uint32 dir_num,uint64 mod_time,uint64 length)176   virtual void DefineFile(const string& name, int32 file_num,
177                           uint32 dir_num, uint64 mod_time,
178                           uint64 length) { }
179 
180   // Called when the line info reader has a new line, address pair
181   // ready for us. ADDRESS is the address of the code, LENGTH is the
182   // length of its machine code in bytes, FILE_NUM is the file number
183   // containing the code, LINE_NUM is the line number in that file for
184   // the code, and COLUMN_NUM is the column number the code starts at,
185   // if we know it (0 otherwise).
AddLine(uint64 address,uint64 length,uint32 file_num,uint32 line_num,uint32 column_num)186   virtual void AddLine(uint64 address, uint64 length,
187                        uint32 file_num, uint32 line_num, uint32 column_num) { }
188 };
189 
190 class RangeListHandler {
191  public:
RangeListHandler()192   RangeListHandler() { }
193 
~RangeListHandler()194   virtual ~RangeListHandler() { }
195 
196   // Add a range.
AddRange(uint64 begin,uint64 end)197   virtual void AddRange(uint64 begin, uint64 end) { };
198 
199   // A new base address must be set for computing the ranges' addresses.
SetBaseAddress(uint64 base_address)200   virtual void SetBaseAddress(uint64 base_address) { };
201 
202   // Finish processing the range list.
Finish()203   virtual void Finish() { };
204 };
205 
206 class RangeListReader {
207  public:
208   RangeListReader(const uint8_t *buffer, uint64 size, ByteReader *reader,
209                   RangeListHandler *handler);
210 
211   bool ReadRangeList(uint64 offset);
212 
213  private:
214   const uint8_t *buffer_;
215   uint64 size_;
216   ByteReader* reader_;
217   RangeListHandler *handler_;
218 };
219 
220 // This class is the main interface between the reader and the
221 // client.  The virtual functions inside this get called for
222 // interesting events that happen during DWARF2 reading.
223 // The default implementation skips everything.
224 class Dwarf2Handler {
225  public:
Dwarf2Handler()226   Dwarf2Handler() { }
227 
~Dwarf2Handler()228   virtual ~Dwarf2Handler() { }
229 
230   // Start to process a compilation unit at OFFSET from the beginning of the
231   // .debug_info section. Return false if you would like to skip this
232   // compilation unit.
StartCompilationUnit(uint64 offset,uint8 address_size,uint8 offset_size,uint64 cu_length,uint8 dwarf_version)233   virtual bool StartCompilationUnit(uint64 offset, uint8 address_size,
234                                     uint8 offset_size, uint64 cu_length,
235                                     uint8 dwarf_version) { return false; }
236 
237   // When processing a skeleton compilation unit, resulting from a split
238   // DWARF compilation, once the skeleton debug info has been read,
239   // the reader will call this function to ask the client if it needs
240   // the full debug info from the .dwo or .dwp file.  Return true if
241   // you need it, or false to skip processing the split debug info.
NeedSplitDebugInfo()242   virtual bool NeedSplitDebugInfo() { return true; }
243 
244   // Start to process a split compilation unit at OFFSET from the beginning of
245   // the debug_info section in the .dwp/.dwo file.  Return false if you would
246   // like to skip this compilation unit.
StartSplitCompilationUnit(uint64 offset,uint64 cu_length)247   virtual bool StartSplitCompilationUnit(uint64 offset,
248                                          uint64 cu_length) { return false; }
249 
250   // Start to process a DIE at OFFSET from the beginning of the .debug_info
251   // section. Return false if you would like to skip this DIE.
StartDIE(uint64 offset,enum DwarfTag tag)252   virtual bool StartDIE(uint64 offset, enum DwarfTag tag) { return false; }
253 
254   // Called when we have an attribute with unsigned data to give to our
255   // handler. The attribute is for the DIE at OFFSET from the beginning of the
256   // .debug_info section. Its name is ATTR, its form is FORM, and its value is
257   // DATA.
ProcessAttributeUnsigned(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,uint64 data)258   virtual void ProcessAttributeUnsigned(uint64 offset,
259                                         enum DwarfAttribute attr,
260                                         enum DwarfForm form,
261                                         uint64 data) { }
262 
263   // Called when we have an attribute with signed data to give to our handler.
264   // The attribute is for the DIE at OFFSET from the beginning of the
265   // .debug_info section. Its name is ATTR, its form is FORM, and its value is
266   // DATA.
ProcessAttributeSigned(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,int64 data)267   virtual void ProcessAttributeSigned(uint64 offset,
268                                       enum DwarfAttribute attr,
269                                       enum DwarfForm form,
270                                       int64 data) { }
271 
272   // Called when we have an attribute whose value is a reference to
273   // another DIE. The attribute belongs to the DIE at OFFSET from the
274   // beginning of the .debug_info section. Its name is ATTR, its form
275   // is FORM, and the offset of the DIE being referred to from the
276   // beginning of the .debug_info section is DATA.
ProcessAttributeReference(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,uint64 data)277   virtual void ProcessAttributeReference(uint64 offset,
278                                          enum DwarfAttribute attr,
279                                          enum DwarfForm form,
280                                          uint64 data) { }
281 
282   // Called when we have an attribute with a buffer of data to give to our
283   // handler. The attribute is for the DIE at OFFSET from the beginning of the
284   // .debug_info section. Its name is ATTR, its form is FORM, DATA points to
285   // the buffer's contents, and its length in bytes is LENGTH. The buffer is
286   // owned by the caller, not the callee, and may not persist for very long.
287   // If you want the data to be available later, it needs to be copied.
ProcessAttributeBuffer(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,const uint8_t * data,uint64 len)288   virtual void ProcessAttributeBuffer(uint64 offset,
289                                       enum DwarfAttribute attr,
290                                       enum DwarfForm form,
291                                       const uint8_t *data,
292                                       uint64 len) { }
293 
294   // Called when we have an attribute with string data to give to our handler.
295   // The attribute is for the DIE at OFFSET from the beginning of the
296   // .debug_info section. Its name is ATTR, its form is FORM, and its value is
297   // DATA.
ProcessAttributeString(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,const string & data)298   virtual void ProcessAttributeString(uint64 offset,
299                                       enum DwarfAttribute attr,
300                                       enum DwarfForm form,
301                                       const string& data) { }
302 
303   // Called when we have an attribute whose value is the 64-bit signature
304   // of a type unit in the .debug_types section. OFFSET is the offset of
305   // the DIE whose attribute we're reporting. ATTR and FORM are the
306   // attribute's name and form. SIGNATURE is the type unit's signature.
ProcessAttributeSignature(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,uint64 signature)307   virtual void ProcessAttributeSignature(uint64 offset,
308                                          enum DwarfAttribute attr,
309                                          enum DwarfForm form,
310                                          uint64 signature) { }
311 
312   // Called when finished processing the DIE at OFFSET.
313   // Because DWARF2/3 specifies a tree of DIEs, you may get starts
314   // before ends of the previous DIE, as we process children before
315   // ending the parent.
EndDIE(uint64 offset)316   virtual void EndDIE(uint64 offset) { }
317 
318 };
319 
320 // The base of DWARF2/3 debug info is a DIE (Debugging Information
321 // Entry.
322 // DWARF groups DIE's into a tree and calls the root of this tree a
323 // "compilation unit".  Most of the time, there is one compilation
324 // unit in the .debug_info section for each file that had debug info
325 // generated.
326 // Each DIE consists of
327 
328 // 1. a tag specifying a thing that is being described (ie
329 // DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc
330 // 2. attributes (such as DW_AT_location for location in memory,
331 // DW_AT_name for name), and data for each attribute.
332 // 3. A flag saying whether the DIE has children or not
333 
334 // In order to gain some amount of compression, the format of
335 // each DIE (tag name, attributes and data forms for the attributes)
336 // are stored in a separate table called the "abbreviation table".
337 // This is done because a large number of DIEs have the exact same tag
338 // and list of attributes, but different data for those attributes.
339 // As a result, the .debug_info section is just a stream of data, and
340 // requires reading of the .debug_abbrev section to say what the data
341 // means.
342 
343 // As a warning to the user, it should be noted that the reason for
344 // using absolute offsets from the beginning of .debug_info is that
345 // DWARF2/3 supports referencing DIE's from other DIE's by their offset
346 // from either the current compilation unit start, *or* the beginning
347 // of the .debug_info section.  This means it is possible to reference
348 // a DIE in one compilation unit from a DIE in another compilation
349 // unit.  This style of reference is usually used to eliminate
350 // duplicated information that occurs across compilation
351 // units, such as base types, etc.  GCC 3.4+ support this with
352 // -feliminate-dwarf2-dups.  Other toolchains will sometimes do
353 // duplicate elimination in the linker.
354 
355 class CompilationUnit {
356  public:
357 
358   // Initialize a compilation unit.  This requires a map of sections,
359   // the offset of this compilation unit in the .debug_info section, a
360   // ByteReader, and a Dwarf2Handler class to call callbacks in.
361   CompilationUnit(const string& path, const SectionMap& sections, uint64 offset,
362                   ByteReader* reader, Dwarf2Handler* handler);
~CompilationUnit()363   virtual ~CompilationUnit() {
364     if (abbrevs_) delete abbrevs_;
365   }
366 
367   // Initialize a compilation unit from a .dwo or .dwp file.
368   // In this case, we need the .debug_addr section from the
369   // executable file that contains the corresponding skeleton
370   // compilation unit.  We also inherit the Dwarf2Handler from
371   // the executable file, and call it as if we were still
372   // processing the original compilation unit.
373   void SetSplitDwarf(const uint8_t* addr_buffer, uint64 addr_buffer_length,
374                      uint64 addr_base, uint64 ranges_base, uint64 dwo_id);
375 
376   // Begin reading a Dwarf2 compilation unit, and calling the
377   // callbacks in the Dwarf2Handler
378 
379   // Return the full length of the compilation unit, including
380   // headers. This plus the starting offset passed to the constructor
381   // is the offset of the end of the compilation unit --- and the
382   // start of the next compilation unit, if there is one.
383   uint64 Start();
384 
385  private:
386 
387   // This struct represents a single DWARF2/3 abbreviation
388   // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a
389   // tag and a list of attributes, as well as the data form of each attribute.
390   struct Abbrev {
391     uint64 number;
392     enum DwarfTag tag;
393     bool has_children;
394     AttributeList attributes;
395   };
396 
397   // A DWARF2/3 compilation unit header.  This is not the same size as
398   // in the actual file, as the one in the file may have a 32 bit or
399   // 64 bit length.
400   struct CompilationUnitHeader {
401     uint64 length;
402     uint16 version;
403     uint64 abbrev_offset;
404     uint8 address_size;
405   } header_;
406 
407   // Reads the DWARF2/3 header for this compilation unit.
408   void ReadHeader();
409 
410   // Reads the DWARF2/3 abbreviations for this compilation unit
411   void ReadAbbrevs();
412 
413   // Processes a single DIE for this compilation unit and return a new
414   // pointer just past the end of it
415   const uint8_t *ProcessDIE(uint64 dieoffset,
416                             const uint8_t *start,
417                             const Abbrev& abbrev);
418 
419   // Processes a single attribute and return a new pointer just past the
420   // end of it
421   const uint8_t *ProcessAttribute(uint64 dieoffset,
422                                   const uint8_t *start,
423                                   enum DwarfAttribute attr,
424                                   enum DwarfForm form);
425 
426   // Called when we have an attribute with unsigned data to give to
427   // our handler.  The attribute is for the DIE at OFFSET from the
428   // beginning of compilation unit, has a name of ATTR, a form of
429   // FORM, and the actual data of the attribute is in DATA.
430   // If we see a DW_AT_GNU_dwo_id attribute, save the value so that
431   // we can find the debug info in a .dwo or .dwp file.
ProcessAttributeUnsigned(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,uint64 data)432   void ProcessAttributeUnsigned(uint64 offset,
433                                 enum DwarfAttribute attr,
434                                 enum DwarfForm form,
435                                 uint64 data) {
436     if (attr == DW_AT_GNU_dwo_id) {
437       dwo_id_ = data;
438     }
439     else if (attr == DW_AT_GNU_addr_base) {
440       addr_base_ = data;
441     }
442     else if (attr == DW_AT_GNU_ranges_base) {
443       ranges_base_ = data;
444     }
445     // TODO(yunlian): When we add DW_AT_ranges_base from DWARF-5,
446     // that base will apply to DW_AT_ranges attributes in the
447     // skeleton CU as well as in the .dwo/.dwp files.
448     else if (attr == DW_AT_ranges && is_split_dwarf_) {
449       data += ranges_base_;
450     }
451     handler_->ProcessAttributeUnsigned(offset, attr, form, data);
452   }
453 
454   // Called when we have an attribute with signed data to give to
455   // our handler.  The attribute is for the DIE at OFFSET from the
456   // beginning of compilation unit, has a name of ATTR, a form of
457   // FORM, and the actual data of the attribute is in DATA.
ProcessAttributeSigned(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,int64 data)458   void ProcessAttributeSigned(uint64 offset,
459                               enum DwarfAttribute attr,
460                               enum DwarfForm form,
461                               int64 data) {
462     handler_->ProcessAttributeSigned(offset, attr, form, data);
463   }
464 
465   // Called when we have an attribute with a buffer of data to give to
466   // our handler.  The attribute is for the DIE at OFFSET from the
467   // beginning of compilation unit, has a name of ATTR, a form of
468   // FORM, and the actual data of the attribute is in DATA, and the
469   // length of the buffer is LENGTH.
ProcessAttributeBuffer(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,const uint8_t * data,uint64 len)470   void ProcessAttributeBuffer(uint64 offset,
471                               enum DwarfAttribute attr,
472                               enum DwarfForm form,
473                               const uint8_t* data,
474                               uint64 len) {
475     handler_->ProcessAttributeBuffer(offset, attr, form, data, len);
476   }
477 
478   // Called when we have an attribute with string data to give to
479   // our handler.  The attribute is for the DIE at OFFSET from the
480   // beginning of compilation unit, has a name of ATTR, a form of
481   // FORM, and the actual data of the attribute is in DATA.
482   // If we see a DW_AT_GNU_dwo_name attribute, save the value so
483   // that we can find the debug info in a .dwo or .dwp file.
ProcessAttributeString(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,const char * data)484   void ProcessAttributeString(uint64 offset,
485                               enum DwarfAttribute attr,
486                               enum DwarfForm form,
487                               const char* data) {
488     if (attr == DW_AT_GNU_dwo_name)
489       dwo_name_ = data;
490     handler_->ProcessAttributeString(offset, attr, form, data);
491   }
492 
493   // Processes all DIEs for this compilation unit
494   void ProcessDIEs();
495 
496   // Skips the die with attributes specified in ABBREV starting at
497   // START, and return the new place to position the stream to.
498   const uint8_t *SkipDIE(const uint8_t *start, const Abbrev& abbrev);
499 
500   // Skips the attribute starting at START, with FORM, and return the
501   // new place to position the stream to.
502   const uint8_t *SkipAttribute(const uint8_t *start, enum DwarfForm form);
503 
504   // Process the actual debug information in a split DWARF file.
505   void ProcessSplitDwarf();
506 
507   // Read the debug sections from a .dwo file.
508   void ReadDebugSectionsFromDwo(ElfReader* elf_reader,
509                                 SectionMap* sections);
510 
511   // Path of the file containing the debug information.
512   const string path_;
513 
514   // Offset from section start is the offset of this compilation unit
515   // from the beginning of the .debug_info section.
516   uint64 offset_from_section_start_;
517 
518   // buffer is the buffer for our CU, starting at .debug_info + offset
519   // passed in from constructor.
520   // after_header points to right after the compilation unit header.
521   const uint8_t *buffer_;
522   uint64 buffer_length_;
523   const uint8_t *after_header_;
524 
525   // The associated ByteReader that handles endianness issues for us
526   ByteReader* reader_;
527 
528   // The map of sections in our file to buffers containing their data
529   const SectionMap& sections_;
530 
531   // The associated handler to call processing functions in
532   Dwarf2Handler* handler_;
533 
534   // Set of DWARF2/3 abbreviations for this compilation unit.  Indexed
535   // by abbreviation number, which means that abbrevs_[0] is not
536   // valid.
537   std::vector<Abbrev>* abbrevs_;
538 
539   // String section buffer and length, if we have a string section.
540   // This is here to avoid doing a section lookup for strings in
541   // ProcessAttribute, which is in the hot path for DWARF2 reading.
542   const uint8_t *string_buffer_;
543   uint64 string_buffer_length_;
544 
545   // String offsets section buffer and length, if we have a string offsets
546   // section (.debug_str_offsets or .debug_str_offsets.dwo).
547   const uint8_t* str_offsets_buffer_;
548   uint64 str_offsets_buffer_length_;
549 
550   // Address section buffer and length, if we have an address section
551   // (.debug_addr).
552   const uint8_t* addr_buffer_;
553   uint64 addr_buffer_length_;
554 
555   // Flag indicating whether this compilation unit is part of a .dwo
556   // or .dwp file.  If true, we are reading this unit because a
557   // skeleton compilation unit in an executable file had a
558   // DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute.
559   // In a .dwo file, we expect the string offsets section to
560   // have a ".dwo" suffix, and we will use the ".debug_addr" section
561   // associated with the skeleton compilation unit.
562   bool is_split_dwarf_;
563 
564   // The value of the DW_AT_GNU_dwo_id attribute, if any.
565   uint64 dwo_id_;
566 
567   // The value of the DW_AT_GNU_dwo_name attribute, if any.
568   const char* dwo_name_;
569 
570   // If this is a split DWARF CU, the value of the DW_AT_GNU_dwo_id attribute
571   // from the skeleton CU.
572   uint64 skeleton_dwo_id_;
573 
574   // The value of the DW_AT_GNU_ranges_base attribute, if any.
575   uint64 ranges_base_;
576 
577   // The value of the DW_AT_GNU_addr_base attribute, if any.
578   uint64 addr_base_;
579 
580   // True if we have already looked for a .dwp file.
581   bool have_checked_for_dwp_;
582 
583   // Path to the .dwp file.
584   string dwp_path_;
585 
586   // ByteReader for the DWP file.
587   std::unique_ptr<ByteReader> dwp_byte_reader_;
588 
589   // DWP reader.
590    std::unique_ptr<DwpReader> dwp_reader_;
591 };
592 
593 // A Reader for a .dwp file.  Supports the fetching of DWARF debug
594 // info for a given dwo_id.
595 //
596 // There are two versions of .dwp files.  In both versions, the
597 // .dwp file is an ELF file containing only debug sections.
598 // In Version 1, the file contains many copies of each debug
599 // section, one for each .dwo file that is packaged in the .dwp
600 // file, and the .debug_cu_index section maps from the dwo_id
601 // to a set of section indexes.  In Version 2, the file contains
602 // one of each debug section, and the .debug_cu_index section
603 // maps from the dwo_id to a set of offsets and lengths that
604 // identify each .dwo file's contribution to the larger sections.
605 
606 class DwpReader {
607  public:
608   DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader);
609 
610   ~DwpReader();
611 
612   // Read the CU index and initialize data members.
613   void Initialize();
614 
615   // Read the debug sections for the given dwo_id.
616   void ReadDebugSectionsForCU(uint64 dwo_id, SectionMap* sections);
617 
618  private:
619   // Search a v1 hash table for "dwo_id".  Returns the slot index
620   // where the dwo_id was found, or -1 if it was not found.
621   int LookupCU(uint64 dwo_id);
622 
623   // Search a v2 hash table for "dwo_id".  Returns the row index
624   // in the offsets and sizes tables, or 0 if it was not found.
625   uint32 LookupCUv2(uint64 dwo_id);
626 
627   // The ELF reader for the .dwp file.
628   ElfReader* elf_reader_;
629 
630   // The ByteReader for the .dwp file.
631   const ByteReader& byte_reader_;
632 
633   // Pointer to the .debug_cu_index section.
634   const char* cu_index_;
635 
636   // Size of the .debug_cu_index section.
637   size_t cu_index_size_;
638 
639   // Pointer to the .debug_str.dwo section.
640   const char* string_buffer_;
641 
642   // Size of the .debug_str.dwo section.
643   size_t string_buffer_size_;
644 
645   // Version of the .dwp file.  We support versions 1 and 2 currently.
646   int version_;
647 
648   // Number of columns in the section tables (version 2).
649   unsigned int ncolumns_;
650 
651   // Number of units in the section tables (version 2).
652   unsigned int nunits_;
653 
654   // Number of slots in the hash table.
655   unsigned int nslots_;
656 
657   // Pointer to the beginning of the hash table.
658   const char* phash_;
659 
660   // Pointer to the beginning of the index table.
661   const char* pindex_;
662 
663   // Pointer to the beginning of the section index pool (version 1).
664   const char* shndx_pool_;
665 
666   // Pointer to the beginning of the section offset table (version 2).
667   const char* offset_table_;
668 
669   // Pointer to the beginning of the section size table (version 2).
670   const char* size_table_;
671 
672   // Contents of the sections of interest (version 2).
673   const char* abbrev_data_;
674   size_t abbrev_size_;
675   const char* info_data_;
676   size_t info_size_;
677   const char* str_offsets_data_;
678   size_t str_offsets_size_;
679 };
680 
681 // This class is a reader for DWARF's Call Frame Information.  CFI
682 // describes how to unwind stack frames --- even for functions that do
683 // not follow fixed conventions for saving registers, whose frame size
684 // varies as they execute, etc.
685 //
686 // CFI describes, at each machine instruction, how to compute the
687 // stack frame's base address, how to find the return address, and
688 // where to find the saved values of the caller's registers (if the
689 // callee has stashed them somewhere to free up the registers for its
690 // own use).
691 //
692 // For example, suppose we have a function whose machine code looks
693 // like this (imagine an assembly language that looks like C, for a
694 // machine with 32-bit registers, and a stack that grows towards lower
695 // addresses):
696 //
697 // func:                                ; entry point; return address at sp
698 // func+0:      sp = sp - 16            ; allocate space for stack frame
699 // func+1:      sp[12] = r0             ; save r0 at sp+12
700 // ...                                  ; other code, not frame-related
701 // func+10:     sp -= 4; *sp = x        ; push some x on the stack
702 // ...                                  ; other code, not frame-related
703 // func+20:     r0 = sp[16]             ; restore saved r0
704 // func+21:     sp += 20                ; pop whole stack frame
705 // func+22:     pc = *sp; sp += 4       ; pop return address and jump to it
706 //
707 // DWARF CFI is (a very compressed representation of) a table with a
708 // row for each machine instruction address and a column for each
709 // register showing how to restore it, if possible.
710 //
711 // A special column named "CFA", for "Canonical Frame Address", tells how
712 // to compute the base address of the frame; registers' entries may
713 // refer to the CFA in describing where the registers are saved.
714 //
715 // Another special column, named "RA", represents the return address.
716 //
717 // For example, here is a complete (uncompressed) table describing the
718 // function above:
719 //
720 //     insn      cfa    r0      r1 ...  ra
721 //     =======================================
722 //     func+0:   sp                     cfa[0]
723 //     func+1:   sp+16                  cfa[0]
724 //     func+2:   sp+16  cfa[-4]         cfa[0]
725 //     func+11:  sp+20  cfa[-4]         cfa[0]
726 //     func+21:  sp+20                  cfa[0]
727 //     func+22:  sp                     cfa[0]
728 //
729 // Some things to note here:
730 //
731 // - Each row describes the state of affairs *before* executing the
732 //   instruction at the given address.  Thus, the row for func+0
733 //   describes the state before we allocate the stack frame.  In the
734 //   next row, the formula for computing the CFA has changed,
735 //   reflecting that allocation.
736 //
737 // - The other entries are written in terms of the CFA; this allows
738 //   them to remain unchanged as the stack pointer gets bumped around.
739 //   For example, the rule for recovering the return address (the "ra"
740 //   column) remains unchanged throughout the function, even as the
741 //   stack pointer takes on three different offsets from the return
742 //   address.
743 //
744 // - Although we haven't shown it, most calling conventions designate
745 //   "callee-saves" and "caller-saves" registers. The callee must
746 //   preserve the values of callee-saves registers; if it uses them,
747 //   it must save their original values somewhere, and restore them
748 //   before it returns. In contrast, the callee is free to trash
749 //   caller-saves registers; if the callee uses these, it will
750 //   probably not bother to save them anywhere, and the CFI will
751 //   probably mark their values as "unrecoverable".
752 //
753 //   (However, since the caller cannot assume the callee was going to
754 //   save them, caller-saves registers are probably dead in the caller
755 //   anyway, so compilers usually don't generate CFA for caller-saves
756 //   registers.)
757 //
758 // - Exactly where the CFA points is a matter of convention that
759 //   depends on the architecture and ABI in use. In the example, the
760 //   CFA is the value the stack pointer had upon entry to the
761 //   function, pointing at the saved return address. But on the x86,
762 //   the call frame information generated by GCC follows the
763 //   convention that the CFA is the address *after* the saved return
764 //   address.
765 //
766 //   But by definition, the CFA remains constant throughout the
767 //   lifetime of the frame. This makes it a useful value for other
768 //   columns to refer to. It is also gives debuggers a useful handle
769 //   for identifying a frame.
770 //
771 // If you look at the table above, you'll notice that a given entry is
772 // often the same as the one immediately above it: most instructions
773 // change only one or two aspects of the stack frame, if they affect
774 // it at all. The DWARF format takes advantage of this fact, and
775 // reduces the size of the data by mentioning only the addresses and
776 // columns at which changes take place. So for the above, DWARF CFI
777 // data would only actually mention the following:
778 //
779 //     insn      cfa    r0      r1 ...  ra
780 //     =======================================
781 //     func+0:   sp                     cfa[0]
782 //     func+1:   sp+16
783 //     func+2:          cfa[-4]
784 //     func+11:  sp+20
785 //     func+21:         r0
786 //     func+22:  sp
787 //
788 // In fact, this is the way the parser reports CFI to the consumer: as
789 // a series of statements of the form, "At address X, column Y changed
790 // to Z," and related conventions for describing the initial state.
791 //
792 // Naturally, it would be impractical to have to scan the entire
793 // program's CFI, noting changes as we go, just to recover the
794 // unwinding rules in effect at one particular instruction. To avoid
795 // this, CFI data is grouped into "entries", each of which covers a
796 // specified range of addresses and begins with a complete statement
797 // of the rules for all recoverable registers at that starting
798 // address. Each entry typically covers a single function.
799 //
800 // Thus, to compute the contents of a given row of the table --- that
801 // is, rules for recovering the CFA, RA, and registers at a given
802 // instruction --- the consumer should find the entry that covers that
803 // instruction's address, start with the initial state supplied at the
804 // beginning of the entry, and work forward until it has processed all
805 // the changes up to and including those for the present instruction.
806 //
807 // There are seven kinds of rules that can appear in an entry of the
808 // table:
809 //
810 // - "undefined": The given register is not preserved by the callee;
811 //   its value cannot be recovered.
812 //
813 // - "same value": This register has the same value it did in the callee.
814 //
815 // - offset(N): The register is saved at offset N from the CFA.
816 //
817 // - val_offset(N): The value the register had in the caller is the
818 //   CFA plus offset N. (This is usually only useful for describing
819 //   the stack pointer.)
820 //
821 // - register(R): The register's value was saved in another register R.
822 //
823 // - expression(E): Evaluating the DWARF expression E using the
824 //   current frame's registers' values yields the address at which the
825 //   register was saved.
826 //
827 // - val_expression(E): Evaluating the DWARF expression E using the
828 //   current frame's registers' values yields the value the register
829 //   had in the caller.
830 
831 class CallFrameInfo {
832  public:
833   // The different kinds of entries one finds in CFI. Used internally,
834   // and for error reporting.
835   enum EntryKind { kUnknown, kCIE, kFDE, kTerminator };
836 
837   // The handler class to which the parser hands the parsed call frame
838   // information.  Defined below.
839   class Handler;
840 
841   // A reporter class, which CallFrameInfo uses to report errors
842   // encountered while parsing call frame information.  Defined below.
843   class Reporter;
844 
845   // Create a DWARF CFI parser. BUFFER points to the contents of the
846   // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes.
847   // REPORTER is an error reporter the parser should use to report
848   // problems. READER is a ByteReader instance that has the endianness and
849   // address size set properly. Report the data we find to HANDLER.
850   //
851   // This class can also parse Linux C++ exception handling data, as found
852   // in '.eh_frame' sections. This data is a variant of DWARF CFI that is
853   // placed in loadable segments so that it is present in the program's
854   // address space, and is interpreted by the C++ runtime to search the
855   // call stack for a handler interested in the exception being thrown,
856   // actually pop the frames, and find cleanup code to run.
857   //
858   // There are two differences between the call frame information described
859   // in the DWARF standard and the exception handling data Linux places in
860   // the .eh_frame section:
861   //
862   // - Exception handling data uses uses a different format for call frame
863   //   information entry headers. The distinguished CIE id, the way FDEs
864   //   refer to their CIEs, and the way the end of the series of entries is
865   //   determined are all slightly different.
866   //
867   //   If the constructor's EH_FRAME argument is true, then the
868   //   CallFrameInfo parses the entry headers as Linux C++ exception
869   //   handling data. If EH_FRAME is false or omitted, the CallFrameInfo
870   //   parses standard DWARF call frame information.
871   //
872   // - Linux C++ exception handling data uses CIE augmentation strings
873   //   beginning with 'z' to specify the presence of additional data after
874   //   the CIE and FDE headers and special encodings used for addresses in
875   //   frame description entries.
876   //
877   //   CallFrameInfo can handle 'z' augmentations in either DWARF CFI or
878   //   exception handling data if you have supplied READER with the base
879   //   addresses needed to interpret the pointer encodings that 'z'
880   //   augmentations can specify. See the ByteReader interface for details
881   //   about the base addresses. See the CallFrameInfo::Handler interface
882   //   for details about the additional information one might find in
883   //   'z'-augmented data.
884   //
885   // Thus:
886   //
887   // - If you are parsing standard DWARF CFI, as found in a .debug_frame
888   //   section, you should pass false for the EH_FRAME argument, or omit
889   //   it, and you need not worry about providing READER with the
890   //   additional base addresses.
891   //
892   // - If you want to parse Linux C++ exception handling data from a
893   //   .eh_frame section, you should pass EH_FRAME as true, and call
894   //   READER's Set*Base member functions before calling our Start method.
895   //
896   // - If you want to parse DWARF CFI that uses the 'z' augmentations
897   //   (although I don't think any toolchain ever emits such data), you
898   //   could pass false for EH_FRAME, but call READER's Set*Base members.
899   //
900   // The extensions the Linux C++ ABI makes to DWARF for exception
901   // handling are described here, rather poorly:
902   // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
903   // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
904   //
905   // The mechanics of C++ exception handling, personality routines,
906   // and language-specific data areas are described here, rather nicely:
907   // http://www.codesourcery.com/public/cxx-abi/abi-eh.html
908   CallFrameInfo(const uint8_t *buffer, size_t buffer_length,
909                 ByteReader *reader, Handler *handler, Reporter *reporter,
910                 bool eh_frame = false)
buffer_(buffer)911       : buffer_(buffer), buffer_length_(buffer_length),
912         reader_(reader), handler_(handler), reporter_(reporter),
913         eh_frame_(eh_frame) { }
914 
~CallFrameInfo()915   ~CallFrameInfo() { }
916 
917   // Parse the entries in BUFFER, reporting what we find to HANDLER.
918   // Return true if we reach the end of the section successfully, or
919   // false if we encounter an error.
920   bool Start();
921 
922   // Return the textual name of KIND. For error reporting.
923   static const char *KindName(EntryKind kind);
924 
925  private:
926 
927   struct CIE;
928 
929   // A CFI entry, either an FDE or a CIE.
930   struct Entry {
931     // The starting offset of the entry in the section, for error
932     // reporting.
933     size_t offset;
934 
935     // The start of this entry in the buffer.
936     const uint8_t *start;
937 
938     // Which kind of entry this is.
939     //
940     // We want to be able to use this for error reporting even while we're
941     // in the midst of parsing. Error reporting code may assume that kind,
942     // offset, and start fields are valid, although kind may be kUnknown.
943     EntryKind kind;
944 
945     // The end of this entry's common prologue (initial length and id), and
946     // the start of this entry's kind-specific fields.
947     const uint8_t *fields;
948 
949     // The start of this entry's instructions.
950     const uint8_t *instructions;
951 
952     // The address past the entry's last byte in the buffer. (Note that
953     // since offset points to the entry's initial length field, and the
954     // length field is the number of bytes after that field, this is not
955     // simply buffer_ + offset + length.)
956     const uint8_t *end;
957 
958     // For both DWARF CFI and .eh_frame sections, this is the CIE id in a
959     // CIE, and the offset of the associated CIE in an FDE.
960     uint64 id;
961 
962     // The CIE that applies to this entry, if we've parsed it. If this is a
963     // CIE, then this field points to this structure.
964     CIE *cie;
965   };
966 
967   // A common information entry (CIE).
968   struct CIE: public Entry {
969     uint8 version;                      // CFI data version number
970     string augmentation;                // vendor format extension markers
971     uint64 code_alignment_factor;       // scale for code address adjustments
972     int data_alignment_factor;          // scale for stack pointer adjustments
973     unsigned return_address_register;   // which register holds the return addr
974 
975     // True if this CIE includes Linux C++ ABI 'z' augmentation data.
976     bool has_z_augmentation;
977 
978     // Parsed 'z' augmentation data. These are meaningful only if
979     // has_z_augmentation is true.
980     bool has_z_lsda;                    // The 'z' augmentation included 'L'.
981     bool has_z_personality;             // The 'z' augmentation included 'P'.
982     bool has_z_signal_frame;            // The 'z' augmentation included 'S'.
983 
984     // If has_z_lsda is true, this is the encoding to be used for language-
985     // specific data area pointers in FDEs.
986     DwarfPointerEncoding lsda_encoding;
987 
988     // If has_z_personality is true, this is the encoding used for the
989     // personality routine pointer in the augmentation data.
990     DwarfPointerEncoding personality_encoding;
991 
992     // If has_z_personality is true, this is the address of the personality
993     // routine --- or, if personality_encoding & DW_EH_PE_indirect, the
994     // address where the personality routine's address is stored.
995     uint64 personality_address;
996 
997     // This is the encoding used for addresses in the FDE header and
998     // in DW_CFA_set_loc instructions. This is always valid, whether
999     // or not we saw a 'z' augmentation string; its default value is
1000     // DW_EH_PE_absptr, which is what normal DWARF CFI uses.
1001     DwarfPointerEncoding pointer_encoding;
1002 
1003     // These were only introduced in DWARF4, so will not be set in older
1004     // versions.
1005     uint8 address_size;
1006     uint8 segment_size;
1007   };
1008 
1009   // A frame description entry (FDE).
1010   struct FDE: public Entry {
1011     uint64 address;                     // start address of described code
1012     uint64 size;                        // size of described code, in bytes
1013 
1014     // If cie->has_z_lsda is true, then this is the language-specific data
1015     // area's address --- or its address's address, if cie->lsda_encoding
1016     // has the DW_EH_PE_indirect bit set.
1017     uint64 lsda_address;
1018   };
1019 
1020   // Internal use.
1021   class Rule;
1022   class UndefinedRule;
1023   class SameValueRule;
1024   class OffsetRule;
1025   class ValOffsetRule;
1026   class RegisterRule;
1027   class ExpressionRule;
1028   class ValExpressionRule;
1029   class RuleMap;
1030   class State;
1031 
1032   // Parse the initial length and id of a CFI entry, either a CIE, an FDE,
1033   // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the
1034   // data to parse. On success, populate ENTRY as appropriate, and return
1035   // true. On failure, report the problem, and return false. Even if we
1036   // return false, set ENTRY->end to the first byte after the entry if we
1037   // were able to figure that out, or NULL if we weren't.
1038   bool ReadEntryPrologue(const uint8_t *cursor, Entry *entry);
1039 
1040   // Parse the fields of a CIE after the entry prologue, including any 'z'
1041   // augmentation data. Assume that the 'Entry' fields of CIE are
1042   // populated; use CIE->fields and CIE->end as the start and limit for
1043   // parsing. On success, populate the rest of *CIE, and return true; on
1044   // failure, report the problem and return false.
1045   bool ReadCIEFields(CIE *cie);
1046 
1047   // Parse the fields of an FDE after the entry prologue, including any 'z'
1048   // augmentation data. Assume that the 'Entry' fields of *FDE are
1049   // initialized; use FDE->fields and FDE->end as the start and limit for
1050   // parsing. Assume that FDE->cie is fully initialized. On success,
1051   // populate the rest of *FDE, and return true; on failure, report the
1052   // problem and return false.
1053   bool ReadFDEFields(FDE *fde);
1054 
1055   // Report that ENTRY is incomplete, and return false. This is just a
1056   // trivial wrapper for invoking reporter_->Incomplete; it provides a
1057   // little brevity.
1058   bool ReportIncomplete(Entry *entry);
1059 
1060   // Return true if ENCODING has the DW_EH_PE_indirect bit set.
IsIndirectEncoding(DwarfPointerEncoding encoding)1061   static bool IsIndirectEncoding(DwarfPointerEncoding encoding) {
1062     return encoding & DW_EH_PE_indirect;
1063   }
1064 
1065   // The contents of the DWARF .debug_info section we're parsing.
1066   const uint8_t *buffer_;
1067   size_t buffer_length_;
1068 
1069   // For reading multi-byte values with the appropriate endianness.
1070   ByteReader *reader_;
1071 
1072   // The handler to which we should report the data we find.
1073   Handler *handler_;
1074 
1075   // For reporting problems in the info we're parsing.
1076   Reporter *reporter_;
1077 
1078   // True if we are processing .eh_frame-format data.
1079   bool eh_frame_;
1080 };
1081 
1082 // The handler class for CallFrameInfo.  The a CFI parser calls the
1083 // member functions of a handler object to report the data it finds.
1084 class CallFrameInfo::Handler {
1085  public:
1086   // The pseudo-register number for the canonical frame address.
1087   enum { kCFARegister = -1 };
1088 
Handler()1089   Handler() { }
~Handler()1090   virtual ~Handler() { }
1091 
1092   // The parser has found CFI for the machine code at ADDRESS,
1093   // extending for LENGTH bytes. OFFSET is the offset of the frame
1094   // description entry in the section, for use in error messages.
1095   // VERSION is the version number of the CFI format. AUGMENTATION is
1096   // a string describing any producer-specific extensions present in
1097   // the data. RETURN_ADDRESS is the number of the register that holds
1098   // the address to which the function should return.
1099   //
1100   // Entry should return true to process this CFI, or false to skip to
1101   // the next entry.
1102   //
1103   // The parser invokes Entry for each Frame Description Entry (FDE)
1104   // it finds.  The parser doesn't report Common Information Entries
1105   // to the handler explicitly; instead, if the handler elects to
1106   // process a given FDE, the parser reiterates the appropriate CIE's
1107   // contents at the beginning of the FDE's rules.
1108   virtual bool Entry(size_t offset, uint64 address, uint64 length,
1109                      uint8 version, const string &augmentation,
1110                      unsigned return_address) = 0;
1111 
1112   // When the Entry function returns true, the parser calls these
1113   // handler functions repeatedly to describe the rules for recovering
1114   // registers at each instruction in the given range of machine code.
1115   // Immediately after a call to Entry, the handler should assume that
1116   // the rule for each callee-saves register is "unchanged" --- that
1117   // is, that the register still has the value it had in the caller.
1118   //
1119   // If a *Rule function returns true, we continue processing this entry's
1120   // instructions. If a *Rule function returns false, we stop evaluating
1121   // instructions, and skip to the next entry. Either way, we call End
1122   // before going on to the next entry.
1123   //
1124   // In all of these functions, if the REG parameter is kCFARegister, then
1125   // the rule describes how to find the canonical frame address.
1126   // kCFARegister may be passed as a BASE_REGISTER argument, meaning that
1127   // the canonical frame address should be used as the base address for the
1128   // computation. All other REG values will be positive.
1129 
1130   // At ADDRESS, register REG's value is not recoverable.
1131   virtual bool UndefinedRule(uint64 address, int reg) = 0;
1132 
1133   // At ADDRESS, register REG's value is the same as that it had in
1134   // the caller.
1135   virtual bool SameValueRule(uint64 address, int reg) = 0;
1136 
1137   // At ADDRESS, register REG has been saved at offset OFFSET from
1138   // BASE_REGISTER.
1139   virtual bool OffsetRule(uint64 address, int reg,
1140                           int base_register, long offset) = 0;
1141 
1142   // At ADDRESS, the caller's value of register REG is the current
1143   // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an
1144   // address at which the register's value is saved.)
1145   virtual bool ValOffsetRule(uint64 address, int reg,
1146                              int base_register, long offset) = 0;
1147 
1148   // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs
1149   // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that
1150   // BASE_REGISTER is the "home" for REG's saved value: if you want to
1151   // assign to a variable whose home is REG in the calling frame, you
1152   // should put the value in BASE_REGISTER.
1153   virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0;
1154 
1155   // At ADDRESS, the DWARF expression EXPRESSION yields the address at
1156   // which REG was saved.
1157   virtual bool ExpressionRule(uint64 address, int reg,
1158                               const string &expression) = 0;
1159 
1160   // At ADDRESS, the DWARF expression EXPRESSION yields the caller's
1161   // value for REG. (This rule doesn't provide an address at which the
1162   // register's value is saved.)
1163   virtual bool ValExpressionRule(uint64 address, int reg,
1164                                  const string &expression) = 0;
1165 
1166   // Indicate that the rules for the address range reported by the
1167   // last call to Entry are complete.  End should return true if
1168   // everything is okay, or false if an error has occurred and parsing
1169   // should stop.
1170   virtual bool End() = 0;
1171 
1172   // Handler functions for Linux C++ exception handling data. These are
1173   // only called if the data includes 'z' augmentation strings.
1174 
1175   // The Linux C++ ABI uses an extension of the DWARF CFI format to
1176   // walk the stack to propagate exceptions from the throw to the
1177   // appropriate catch, and do the appropriate cleanups along the way.
1178   // CFI entries used for exception handling have two additional data
1179   // associated with them:
1180   //
1181   // - The "language-specific data area" describes which exception
1182   //   types the function has 'catch' clauses for, and indicates how
1183   //   to go about re-entering the function at the appropriate catch
1184   //   clause. If the exception is not caught, it describes the
1185   //   destructors that must run before the frame is popped.
1186   //
1187   // - The "personality routine" is responsible for interpreting the
1188   //   language-specific data area's contents, and deciding whether
1189   //   the exception should continue to propagate down the stack,
1190   //   perhaps after doing some cleanup for this frame, or whether the
1191   //   exception will be caught here.
1192   //
1193   // In principle, the language-specific data area is opaque to
1194   // everybody but the personality routine. In practice, these values
1195   // may be useful or interesting to readers with extra context, and
1196   // we have to at least skip them anyway, so we might as well report
1197   // them to the handler.
1198 
1199   // This entry's exception handling personality routine's address is
1200   // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
1201   // which the routine's address is stored. The default definition for
1202   // this handler function simply returns true, allowing parsing of
1203   // the entry to continue.
PersonalityRoutine(uint64 address,bool indirect)1204   virtual bool PersonalityRoutine(uint64 address, bool indirect) {
1205     return true;
1206   }
1207 
1208   // This entry's language-specific data area (LSDA) is located at
1209   // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
1210   // which the area's address is stored. The default definition for
1211   // this handler function simply returns true, allowing parsing of
1212   // the entry to continue.
LanguageSpecificDataArea(uint64 address,bool indirect)1213   virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) {
1214     return true;
1215   }
1216 
1217   // This entry describes a signal trampoline --- this frame is the
1218   // caller of a signal handler. The default definition for this
1219   // handler function simply returns true, allowing parsing of the
1220   // entry to continue.
1221   //
1222   // The best description of the rationale for and meaning of signal
1223   // trampoline CFI entries seems to be in the GCC bug database:
1224   // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208
SignalHandler()1225   virtual bool SignalHandler() { return true; }
1226 };
1227 
1228 // The CallFrameInfo class makes calls on an instance of this class to
1229 // report errors or warn about problems in the data it is parsing. The
1230 // default definitions of these methods print a message to stderr, but
1231 // you can make a derived class that overrides them.
1232 class CallFrameInfo::Reporter {
1233  public:
1234   // Create an error reporter which attributes troubles to the section
1235   // named SECTION in FILENAME.
1236   //
1237   // Normally SECTION would be .debug_frame, but the Mac puts CFI data
1238   // in a Mach-O section named __debug_frame. If we support
1239   // Linux-style exception handling data, we could be reading an
1240   // .eh_frame section.
1241   Reporter(const string &filename,
1242            const string &section = ".debug_frame")
filename_(filename)1243       : filename_(filename), section_(section) { }
~Reporter()1244   virtual ~Reporter() { }
1245 
1246   // The CFI entry at OFFSET ends too early to be well-formed. KIND
1247   // indicates what kind of entry it is; KIND can be kUnknown if we
1248   // haven't parsed enough of the entry to tell yet.
1249   virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind);
1250 
1251   // The .eh_frame data has a four-byte zero at OFFSET where the next
1252   // entry's length would be; this is a terminator. However, the buffer
1253   // length as given to the CallFrameInfo constructor says there should be
1254   // more data.
1255   virtual void EarlyEHTerminator(uint64 offset);
1256 
1257   // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the
1258   // section is not that large.
1259   virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset);
1260 
1261   // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry
1262   // there is not a CIE.
1263   virtual void BadCIEId(uint64 offset, uint64 cie_offset);
1264 
1265   // The FDE at OFFSET refers to a CIE with an address size we don't know how
1266   // to handle.
1267   virtual void UnexpectedAddressSize(uint64 offset, uint8_t address_size);
1268 
1269   // The FDE at OFFSET refers to a CIE with an segment descriptor size we
1270   // don't know how to handle.
1271   virtual void UnexpectedSegmentSize(uint64 offset, uint8_t segment_size);
1272 
1273   // The FDE at OFFSET refers to a CIE with version number VERSION,
1274   // which we don't recognize. We cannot parse DWARF CFI if it uses
1275   // a version number we don't recognize.
1276   virtual void UnrecognizedVersion(uint64 offset, int version);
1277 
1278   // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION,
1279   // which we don't recognize. We cannot parse DWARF CFI if it uses
1280   // augmentations we don't recognize.
1281   virtual void UnrecognizedAugmentation(uint64 offset,
1282                                         const string &augmentation);
1283 
1284   // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not
1285   // a valid encoding.
1286   virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding);
1287 
1288   // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends
1289   // on a base address which has not been supplied.
1290   virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding);
1291 
1292   // The CIE at OFFSET contains a DW_CFA_restore instruction at
1293   // INSN_OFFSET, which may not appear in a CIE.
1294   virtual void RestoreInCIE(uint64 offset, uint64 insn_offset);
1295 
1296   // The entry at OFFSET, of kind KIND, has an unrecognized
1297   // instruction at INSN_OFFSET.
1298   virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind,
1299                               uint64 insn_offset);
1300 
1301   // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
1302   // KIND, establishes a rule that cites the CFA, but we have not
1303   // established a CFA rule yet.
1304   virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind,
1305                          uint64 insn_offset);
1306 
1307   // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
1308   // KIND, is a DW_CFA_restore_state instruction, but the stack of
1309   // saved states is empty.
1310   virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind,
1311                                uint64 insn_offset);
1312 
1313   // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry
1314   // at OFFSET, of kind KIND, would restore a state that has no CFA
1315   // rule, whereas the current state does have a CFA rule. This is
1316   // bogus input, which the CallFrameInfo::Handler interface doesn't
1317   // (and shouldn't) have any way to report.
1318   virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind,
1319                                uint64 insn_offset);
1320 
1321  protected:
1322   // The name of the file whose CFI we're reading.
1323   string filename_;
1324 
1325   // The name of the CFI section in that file.
1326   string section_;
1327 };
1328 
1329 }  // namespace dwarf2reader
1330 
1331 #endif  // UTIL_DEBUGINFO_DWARF2READER_H__
1332