1 // -*- mode: C++ -*- 2 3 // Copyright (c) 2010 Google Inc. All Rights Reserved. 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 32 33 // This file contains definitions related to the DWARF2/3 reader and 34 // it's handler interfaces. 35 // The DWARF2/3 specification can be found at 36 // http://dwarf.freestandards.org and should be considered required 37 // reading if you wish to modify the implementation. 38 // Only a cursory attempt is made to explain terminology that is 39 // used here, as it is much better explained in the standard documents 40 #ifndef COMMON_DWARF_DWARF2READER_H__ 41 #define COMMON_DWARF_DWARF2READER_H__ 42 43 #include <stdint.h> 44 45 #include <list> 46 #include <map> 47 #include <string> 48 #include <utility> 49 #include <vector> 50 #include <memory> 51 52 #include "common/dwarf/bytereader.h" 53 #include "common/dwarf/dwarf2enums.h" 54 #include "common/dwarf/types.h" 55 #include "common/using_std_string.h" 56 #include "common/dwarf/elf_reader.h" 57 58 namespace dwarf2reader { 59 struct LineStateMachine; 60 class Dwarf2Handler; 61 class LineInfoHandler; 62 class DwpReader; 63 64 // This maps from a string naming a section to a pair containing a 65 // the data for the section, and the size of the section. 66 typedef std::map<string, std::pair<const uint8_t *, uint64> > SectionMap; 67 typedef std::list<std::pair<enum DwarfAttribute, enum DwarfForm> > 68 AttributeList; 69 typedef AttributeList::iterator AttributeIterator; 70 typedef AttributeList::const_iterator ConstAttributeIterator; 71 72 struct LineInfoHeader { 73 uint64 total_length; 74 uint16 version; 75 uint64 prologue_length; 76 uint8 min_insn_length; // insn stands for instructin 77 bool default_is_stmt; // stmt stands for statement 78 int8 line_base; 79 uint8 line_range; 80 uint8 opcode_base; 81 // Use a pointer so that signalsafe_addr2line is able to use this structure 82 // without heap allocation problem. 83 std::vector<unsigned char> *std_opcode_lengths; 84 }; 85 86 class LineInfo { 87 public: 88 89 // Initializes a .debug_line reader. Buffer and buffer length point 90 // to the beginning and length of the line information to read. 91 // Reader is a ByteReader class that has the endianness set 92 // properly. 93 LineInfo(const uint8_t *buffer_, uint64 buffer_length, 94 ByteReader* reader, LineInfoHandler* handler); 95 ~LineInfo()96 virtual ~LineInfo() { 97 if (header_.std_opcode_lengths) { 98 delete header_.std_opcode_lengths; 99 } 100 } 101 102 // Start processing line info, and calling callbacks in the handler. 103 // Consumes the line number information for a single compilation unit. 104 // Returns the number of bytes processed. 105 uint64 Start(); 106 107 // Process a single line info opcode at START using the state 108 // machine at LSM. Return true if we should define a line using the 109 // current state of the line state machine. Place the length of the 110 // opcode in LEN. 111 // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm 112 // passes the address of PC. In other words, LSM_PASSES_PC will be 113 // set to true, if the following condition is met. 114 // 115 // lsm's old address < PC <= lsm's new address 116 static bool ProcessOneOpcode(ByteReader* reader, 117 LineInfoHandler* handler, 118 const struct LineInfoHeader &header, 119 const uint8_t *start, 120 struct LineStateMachine* lsm, 121 size_t* len, 122 uintptr pc, 123 bool *lsm_passes_pc); 124 125 private: 126 // Reads the DWARF2/3 header for this line info. 127 void ReadHeader(); 128 129 // Reads the DWARF2/3 line information 130 void ReadLines(); 131 132 // The associated handler to call processing functions in 133 LineInfoHandler* handler_; 134 135 // The associated ByteReader that handles endianness issues for us 136 ByteReader* reader_; 137 138 // A DWARF2/3 line info header. This is not the same size as 139 // in the actual file, as the one in the file may have a 32 bit or 140 // 64 bit lengths 141 142 struct LineInfoHeader header_; 143 144 // buffer is the buffer for our line info, starting at exactly where 145 // the line info to read is. after_header is the place right after 146 // the end of the line information header. 147 const uint8_t *buffer_; 148 #ifndef NDEBUG 149 uint64 buffer_length_; 150 #endif 151 const uint8_t *after_header_; 152 }; 153 154 // This class is the main interface between the line info reader and 155 // the client. The virtual functions inside this get called for 156 // interesting events that happen during line info reading. The 157 // default implementation does nothing 158 159 class LineInfoHandler { 160 public: LineInfoHandler()161 LineInfoHandler() { } 162 ~LineInfoHandler()163 virtual ~LineInfoHandler() { } 164 165 // Called when we define a directory. NAME is the directory name, 166 // DIR_NUM is the directory number DefineDir(const string & name,uint32 dir_num)167 virtual void DefineDir(const string& name, uint32 dir_num) { } 168 169 // Called when we define a filename. NAME is the filename, FILE_NUM 170 // is the file number which is -1 if the file index is the next 171 // index after the last numbered index (this happens when files are 172 // dynamically defined by the line program), DIR_NUM is the 173 // directory index for the directory name of this file, MOD_TIME is 174 // the modification time of the file, and LENGTH is the length of 175 // the file DefineFile(const string & name,int32 file_num,uint32 dir_num,uint64 mod_time,uint64 length)176 virtual void DefineFile(const string& name, int32 file_num, 177 uint32 dir_num, uint64 mod_time, 178 uint64 length) { } 179 180 // Called when the line info reader has a new line, address pair 181 // ready for us. ADDRESS is the address of the code, LENGTH is the 182 // length of its machine code in bytes, FILE_NUM is the file number 183 // containing the code, LINE_NUM is the line number in that file for 184 // the code, and COLUMN_NUM is the column number the code starts at, 185 // if we know it (0 otherwise). AddLine(uint64 address,uint64 length,uint32 file_num,uint32 line_num,uint32 column_num)186 virtual void AddLine(uint64 address, uint64 length, 187 uint32 file_num, uint32 line_num, uint32 column_num) { } 188 }; 189 190 class RangeListHandler { 191 public: RangeListHandler()192 RangeListHandler() { } 193 ~RangeListHandler()194 virtual ~RangeListHandler() { } 195 196 // Add a range. AddRange(uint64 begin,uint64 end)197 virtual void AddRange(uint64 begin, uint64 end) { }; 198 199 // A new base address must be set for computing the ranges' addresses. SetBaseAddress(uint64 base_address)200 virtual void SetBaseAddress(uint64 base_address) { }; 201 202 // Finish processing the range list. Finish()203 virtual void Finish() { }; 204 }; 205 206 class RangeListReader { 207 public: 208 RangeListReader(const uint8_t *buffer, uint64 size, ByteReader *reader, 209 RangeListHandler *handler); 210 211 bool ReadRangeList(uint64 offset); 212 213 private: 214 const uint8_t *buffer_; 215 uint64 size_; 216 ByteReader* reader_; 217 RangeListHandler *handler_; 218 }; 219 220 // This class is the main interface between the reader and the 221 // client. The virtual functions inside this get called for 222 // interesting events that happen during DWARF2 reading. 223 // The default implementation skips everything. 224 class Dwarf2Handler { 225 public: Dwarf2Handler()226 Dwarf2Handler() { } 227 ~Dwarf2Handler()228 virtual ~Dwarf2Handler() { } 229 230 // Start to process a compilation unit at OFFSET from the beginning of the 231 // .debug_info section. Return false if you would like to skip this 232 // compilation unit. StartCompilationUnit(uint64 offset,uint8 address_size,uint8 offset_size,uint64 cu_length,uint8 dwarf_version)233 virtual bool StartCompilationUnit(uint64 offset, uint8 address_size, 234 uint8 offset_size, uint64 cu_length, 235 uint8 dwarf_version) { return false; } 236 237 // When processing a skeleton compilation unit, resulting from a split 238 // DWARF compilation, once the skeleton debug info has been read, 239 // the reader will call this function to ask the client if it needs 240 // the full debug info from the .dwo or .dwp file. Return true if 241 // you need it, or false to skip processing the split debug info. NeedSplitDebugInfo()242 virtual bool NeedSplitDebugInfo() { return true; } 243 244 // Start to process a split compilation unit at OFFSET from the beginning of 245 // the debug_info section in the .dwp/.dwo file. Return false if you would 246 // like to skip this compilation unit. StartSplitCompilationUnit(uint64 offset,uint64 cu_length)247 virtual bool StartSplitCompilationUnit(uint64 offset, 248 uint64 cu_length) { return false; } 249 250 // Start to process a DIE at OFFSET from the beginning of the .debug_info 251 // section. Return false if you would like to skip this DIE. StartDIE(uint64 offset,enum DwarfTag tag)252 virtual bool StartDIE(uint64 offset, enum DwarfTag tag) { return false; } 253 254 // Called when we have an attribute with unsigned data to give to our 255 // handler. The attribute is for the DIE at OFFSET from the beginning of the 256 // .debug_info section. Its name is ATTR, its form is FORM, and its value is 257 // DATA. ProcessAttributeUnsigned(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,uint64 data)258 virtual void ProcessAttributeUnsigned(uint64 offset, 259 enum DwarfAttribute attr, 260 enum DwarfForm form, 261 uint64 data) { } 262 263 // Called when we have an attribute with signed data to give to our handler. 264 // The attribute is for the DIE at OFFSET from the beginning of the 265 // .debug_info section. Its name is ATTR, its form is FORM, and its value is 266 // DATA. ProcessAttributeSigned(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,int64 data)267 virtual void ProcessAttributeSigned(uint64 offset, 268 enum DwarfAttribute attr, 269 enum DwarfForm form, 270 int64 data) { } 271 272 // Called when we have an attribute whose value is a reference to 273 // another DIE. The attribute belongs to the DIE at OFFSET from the 274 // beginning of the .debug_info section. Its name is ATTR, its form 275 // is FORM, and the offset of the DIE being referred to from the 276 // beginning of the .debug_info section is DATA. ProcessAttributeReference(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,uint64 data)277 virtual void ProcessAttributeReference(uint64 offset, 278 enum DwarfAttribute attr, 279 enum DwarfForm form, 280 uint64 data) { } 281 282 // Called when we have an attribute with a buffer of data to give to our 283 // handler. The attribute is for the DIE at OFFSET from the beginning of the 284 // .debug_info section. Its name is ATTR, its form is FORM, DATA points to 285 // the buffer's contents, and its length in bytes is LENGTH. The buffer is 286 // owned by the caller, not the callee, and may not persist for very long. 287 // If you want the data to be available later, it needs to be copied. ProcessAttributeBuffer(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,const uint8_t * data,uint64 len)288 virtual void ProcessAttributeBuffer(uint64 offset, 289 enum DwarfAttribute attr, 290 enum DwarfForm form, 291 const uint8_t *data, 292 uint64 len) { } 293 294 // Called when we have an attribute with string data to give to our handler. 295 // The attribute is for the DIE at OFFSET from the beginning of the 296 // .debug_info section. Its name is ATTR, its form is FORM, and its value is 297 // DATA. ProcessAttributeString(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,const string & data)298 virtual void ProcessAttributeString(uint64 offset, 299 enum DwarfAttribute attr, 300 enum DwarfForm form, 301 const string& data) { } 302 303 // Called when we have an attribute whose value is the 64-bit signature 304 // of a type unit in the .debug_types section. OFFSET is the offset of 305 // the DIE whose attribute we're reporting. ATTR and FORM are the 306 // attribute's name and form. SIGNATURE is the type unit's signature. ProcessAttributeSignature(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,uint64 signature)307 virtual void ProcessAttributeSignature(uint64 offset, 308 enum DwarfAttribute attr, 309 enum DwarfForm form, 310 uint64 signature) { } 311 312 // Called when finished processing the DIE at OFFSET. 313 // Because DWARF2/3 specifies a tree of DIEs, you may get starts 314 // before ends of the previous DIE, as we process children before 315 // ending the parent. EndDIE(uint64 offset)316 virtual void EndDIE(uint64 offset) { } 317 318 }; 319 320 // The base of DWARF2/3 debug info is a DIE (Debugging Information 321 // Entry. 322 // DWARF groups DIE's into a tree and calls the root of this tree a 323 // "compilation unit". Most of the time, there is one compilation 324 // unit in the .debug_info section for each file that had debug info 325 // generated. 326 // Each DIE consists of 327 328 // 1. a tag specifying a thing that is being described (ie 329 // DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc 330 // 2. attributes (such as DW_AT_location for location in memory, 331 // DW_AT_name for name), and data for each attribute. 332 // 3. A flag saying whether the DIE has children or not 333 334 // In order to gain some amount of compression, the format of 335 // each DIE (tag name, attributes and data forms for the attributes) 336 // are stored in a separate table called the "abbreviation table". 337 // This is done because a large number of DIEs have the exact same tag 338 // and list of attributes, but different data for those attributes. 339 // As a result, the .debug_info section is just a stream of data, and 340 // requires reading of the .debug_abbrev section to say what the data 341 // means. 342 343 // As a warning to the user, it should be noted that the reason for 344 // using absolute offsets from the beginning of .debug_info is that 345 // DWARF2/3 supports referencing DIE's from other DIE's by their offset 346 // from either the current compilation unit start, *or* the beginning 347 // of the .debug_info section. This means it is possible to reference 348 // a DIE in one compilation unit from a DIE in another compilation 349 // unit. This style of reference is usually used to eliminate 350 // duplicated information that occurs across compilation 351 // units, such as base types, etc. GCC 3.4+ support this with 352 // -feliminate-dwarf2-dups. Other toolchains will sometimes do 353 // duplicate elimination in the linker. 354 355 class CompilationUnit { 356 public: 357 358 // Initialize a compilation unit. This requires a map of sections, 359 // the offset of this compilation unit in the .debug_info section, a 360 // ByteReader, and a Dwarf2Handler class to call callbacks in. 361 CompilationUnit(const string& path, const SectionMap& sections, uint64 offset, 362 ByteReader* reader, Dwarf2Handler* handler); ~CompilationUnit()363 virtual ~CompilationUnit() { 364 if (abbrevs_) delete abbrevs_; 365 } 366 367 // Initialize a compilation unit from a .dwo or .dwp file. 368 // In this case, we need the .debug_addr section from the 369 // executable file that contains the corresponding skeleton 370 // compilation unit. We also inherit the Dwarf2Handler from 371 // the executable file, and call it as if we were still 372 // processing the original compilation unit. 373 void SetSplitDwarf(const uint8_t* addr_buffer, uint64 addr_buffer_length, 374 uint64 addr_base, uint64 ranges_base, uint64 dwo_id); 375 376 // Begin reading a Dwarf2 compilation unit, and calling the 377 // callbacks in the Dwarf2Handler 378 379 // Return the full length of the compilation unit, including 380 // headers. This plus the starting offset passed to the constructor 381 // is the offset of the end of the compilation unit --- and the 382 // start of the next compilation unit, if there is one. 383 uint64 Start(); 384 385 private: 386 387 // This struct represents a single DWARF2/3 abbreviation 388 // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a 389 // tag and a list of attributes, as well as the data form of each attribute. 390 struct Abbrev { 391 uint64 number; 392 enum DwarfTag tag; 393 bool has_children; 394 AttributeList attributes; 395 }; 396 397 // A DWARF2/3 compilation unit header. This is not the same size as 398 // in the actual file, as the one in the file may have a 32 bit or 399 // 64 bit length. 400 struct CompilationUnitHeader { 401 uint64 length; 402 uint16 version; 403 uint64 abbrev_offset; 404 uint8 address_size; 405 } header_; 406 407 // Reads the DWARF2/3 header for this compilation unit. 408 void ReadHeader(); 409 410 // Reads the DWARF2/3 abbreviations for this compilation unit 411 void ReadAbbrevs(); 412 413 // Processes a single DIE for this compilation unit and return a new 414 // pointer just past the end of it 415 const uint8_t *ProcessDIE(uint64 dieoffset, 416 const uint8_t *start, 417 const Abbrev& abbrev); 418 419 // Processes a single attribute and return a new pointer just past the 420 // end of it 421 const uint8_t *ProcessAttribute(uint64 dieoffset, 422 const uint8_t *start, 423 enum DwarfAttribute attr, 424 enum DwarfForm form); 425 426 // Called when we have an attribute with unsigned data to give to 427 // our handler. The attribute is for the DIE at OFFSET from the 428 // beginning of compilation unit, has a name of ATTR, a form of 429 // FORM, and the actual data of the attribute is in DATA. 430 // If we see a DW_AT_GNU_dwo_id attribute, save the value so that 431 // we can find the debug info in a .dwo or .dwp file. ProcessAttributeUnsigned(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,uint64 data)432 void ProcessAttributeUnsigned(uint64 offset, 433 enum DwarfAttribute attr, 434 enum DwarfForm form, 435 uint64 data) { 436 if (attr == DW_AT_GNU_dwo_id) { 437 dwo_id_ = data; 438 } 439 else if (attr == DW_AT_GNU_addr_base) { 440 addr_base_ = data; 441 } 442 else if (attr == DW_AT_GNU_ranges_base) { 443 ranges_base_ = data; 444 } 445 // TODO(yunlian): When we add DW_AT_ranges_base from DWARF-5, 446 // that base will apply to DW_AT_ranges attributes in the 447 // skeleton CU as well as in the .dwo/.dwp files. 448 else if (attr == DW_AT_ranges && is_split_dwarf_) { 449 data += ranges_base_; 450 } 451 handler_->ProcessAttributeUnsigned(offset, attr, form, data); 452 } 453 454 // Called when we have an attribute with signed data to give to 455 // our handler. The attribute is for the DIE at OFFSET from the 456 // beginning of compilation unit, has a name of ATTR, a form of 457 // FORM, and the actual data of the attribute is in DATA. ProcessAttributeSigned(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,int64 data)458 void ProcessAttributeSigned(uint64 offset, 459 enum DwarfAttribute attr, 460 enum DwarfForm form, 461 int64 data) { 462 handler_->ProcessAttributeSigned(offset, attr, form, data); 463 } 464 465 // Called when we have an attribute with a buffer of data to give to 466 // our handler. The attribute is for the DIE at OFFSET from the 467 // beginning of compilation unit, has a name of ATTR, a form of 468 // FORM, and the actual data of the attribute is in DATA, and the 469 // length of the buffer is LENGTH. ProcessAttributeBuffer(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,const uint8_t * data,uint64 len)470 void ProcessAttributeBuffer(uint64 offset, 471 enum DwarfAttribute attr, 472 enum DwarfForm form, 473 const uint8_t* data, 474 uint64 len) { 475 handler_->ProcessAttributeBuffer(offset, attr, form, data, len); 476 } 477 478 // Called when we have an attribute with string data to give to 479 // our handler. The attribute is for the DIE at OFFSET from the 480 // beginning of compilation unit, has a name of ATTR, a form of 481 // FORM, and the actual data of the attribute is in DATA. 482 // If we see a DW_AT_GNU_dwo_name attribute, save the value so 483 // that we can find the debug info in a .dwo or .dwp file. ProcessAttributeString(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,const char * data)484 void ProcessAttributeString(uint64 offset, 485 enum DwarfAttribute attr, 486 enum DwarfForm form, 487 const char* data) { 488 if (attr == DW_AT_GNU_dwo_name) 489 dwo_name_ = data; 490 handler_->ProcessAttributeString(offset, attr, form, data); 491 } 492 493 // Processes all DIEs for this compilation unit 494 void ProcessDIEs(); 495 496 // Skips the die with attributes specified in ABBREV starting at 497 // START, and return the new place to position the stream to. 498 const uint8_t *SkipDIE(const uint8_t *start, const Abbrev& abbrev); 499 500 // Skips the attribute starting at START, with FORM, and return the 501 // new place to position the stream to. 502 const uint8_t *SkipAttribute(const uint8_t *start, enum DwarfForm form); 503 504 // Process the actual debug information in a split DWARF file. 505 void ProcessSplitDwarf(); 506 507 // Read the debug sections from a .dwo file. 508 void ReadDebugSectionsFromDwo(ElfReader* elf_reader, 509 SectionMap* sections); 510 511 // Path of the file containing the debug information. 512 const string path_; 513 514 // Offset from section start is the offset of this compilation unit 515 // from the beginning of the .debug_info section. 516 uint64 offset_from_section_start_; 517 518 // buffer is the buffer for our CU, starting at .debug_info + offset 519 // passed in from constructor. 520 // after_header points to right after the compilation unit header. 521 const uint8_t *buffer_; 522 uint64 buffer_length_; 523 const uint8_t *after_header_; 524 525 // The associated ByteReader that handles endianness issues for us 526 ByteReader* reader_; 527 528 // The map of sections in our file to buffers containing their data 529 const SectionMap& sections_; 530 531 // The associated handler to call processing functions in 532 Dwarf2Handler* handler_; 533 534 // Set of DWARF2/3 abbreviations for this compilation unit. Indexed 535 // by abbreviation number, which means that abbrevs_[0] is not 536 // valid. 537 std::vector<Abbrev>* abbrevs_; 538 539 // String section buffer and length, if we have a string section. 540 // This is here to avoid doing a section lookup for strings in 541 // ProcessAttribute, which is in the hot path for DWARF2 reading. 542 const uint8_t *string_buffer_; 543 uint64 string_buffer_length_; 544 545 // String offsets section buffer and length, if we have a string offsets 546 // section (.debug_str_offsets or .debug_str_offsets.dwo). 547 const uint8_t* str_offsets_buffer_; 548 uint64 str_offsets_buffer_length_; 549 550 // Address section buffer and length, if we have an address section 551 // (.debug_addr). 552 const uint8_t* addr_buffer_; 553 uint64 addr_buffer_length_; 554 555 // Flag indicating whether this compilation unit is part of a .dwo 556 // or .dwp file. If true, we are reading this unit because a 557 // skeleton compilation unit in an executable file had a 558 // DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute. 559 // In a .dwo file, we expect the string offsets section to 560 // have a ".dwo" suffix, and we will use the ".debug_addr" section 561 // associated with the skeleton compilation unit. 562 bool is_split_dwarf_; 563 564 // The value of the DW_AT_GNU_dwo_id attribute, if any. 565 uint64 dwo_id_; 566 567 // The value of the DW_AT_GNU_dwo_name attribute, if any. 568 const char* dwo_name_; 569 570 // If this is a split DWARF CU, the value of the DW_AT_GNU_dwo_id attribute 571 // from the skeleton CU. 572 uint64 skeleton_dwo_id_; 573 574 // The value of the DW_AT_GNU_ranges_base attribute, if any. 575 uint64 ranges_base_; 576 577 // The value of the DW_AT_GNU_addr_base attribute, if any. 578 uint64 addr_base_; 579 580 // True if we have already looked for a .dwp file. 581 bool have_checked_for_dwp_; 582 583 // Path to the .dwp file. 584 string dwp_path_; 585 586 // ByteReader for the DWP file. 587 std::unique_ptr<ByteReader> dwp_byte_reader_; 588 589 // DWP reader. 590 std::unique_ptr<DwpReader> dwp_reader_; 591 }; 592 593 // A Reader for a .dwp file. Supports the fetching of DWARF debug 594 // info for a given dwo_id. 595 // 596 // There are two versions of .dwp files. In both versions, the 597 // .dwp file is an ELF file containing only debug sections. 598 // In Version 1, the file contains many copies of each debug 599 // section, one for each .dwo file that is packaged in the .dwp 600 // file, and the .debug_cu_index section maps from the dwo_id 601 // to a set of section indexes. In Version 2, the file contains 602 // one of each debug section, and the .debug_cu_index section 603 // maps from the dwo_id to a set of offsets and lengths that 604 // identify each .dwo file's contribution to the larger sections. 605 606 class DwpReader { 607 public: 608 DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader); 609 610 ~DwpReader(); 611 612 // Read the CU index and initialize data members. 613 void Initialize(); 614 615 // Read the debug sections for the given dwo_id. 616 void ReadDebugSectionsForCU(uint64 dwo_id, SectionMap* sections); 617 618 private: 619 // Search a v1 hash table for "dwo_id". Returns the slot index 620 // where the dwo_id was found, or -1 if it was not found. 621 int LookupCU(uint64 dwo_id); 622 623 // Search a v2 hash table for "dwo_id". Returns the row index 624 // in the offsets and sizes tables, or 0 if it was not found. 625 uint32 LookupCUv2(uint64 dwo_id); 626 627 // The ELF reader for the .dwp file. 628 ElfReader* elf_reader_; 629 630 // The ByteReader for the .dwp file. 631 const ByteReader& byte_reader_; 632 633 // Pointer to the .debug_cu_index section. 634 const char* cu_index_; 635 636 // Size of the .debug_cu_index section. 637 size_t cu_index_size_; 638 639 // Pointer to the .debug_str.dwo section. 640 const char* string_buffer_; 641 642 // Size of the .debug_str.dwo section. 643 size_t string_buffer_size_; 644 645 // Version of the .dwp file. We support versions 1 and 2 currently. 646 int version_; 647 648 // Number of columns in the section tables (version 2). 649 unsigned int ncolumns_; 650 651 // Number of units in the section tables (version 2). 652 unsigned int nunits_; 653 654 // Number of slots in the hash table. 655 unsigned int nslots_; 656 657 // Pointer to the beginning of the hash table. 658 const char* phash_; 659 660 // Pointer to the beginning of the index table. 661 const char* pindex_; 662 663 // Pointer to the beginning of the section index pool (version 1). 664 const char* shndx_pool_; 665 666 // Pointer to the beginning of the section offset table (version 2). 667 const char* offset_table_; 668 669 // Pointer to the beginning of the section size table (version 2). 670 const char* size_table_; 671 672 // Contents of the sections of interest (version 2). 673 const char* abbrev_data_; 674 size_t abbrev_size_; 675 const char* info_data_; 676 size_t info_size_; 677 const char* str_offsets_data_; 678 size_t str_offsets_size_; 679 }; 680 681 // This class is a reader for DWARF's Call Frame Information. CFI 682 // describes how to unwind stack frames --- even for functions that do 683 // not follow fixed conventions for saving registers, whose frame size 684 // varies as they execute, etc. 685 // 686 // CFI describes, at each machine instruction, how to compute the 687 // stack frame's base address, how to find the return address, and 688 // where to find the saved values of the caller's registers (if the 689 // callee has stashed them somewhere to free up the registers for its 690 // own use). 691 // 692 // For example, suppose we have a function whose machine code looks 693 // like this (imagine an assembly language that looks like C, for a 694 // machine with 32-bit registers, and a stack that grows towards lower 695 // addresses): 696 // 697 // func: ; entry point; return address at sp 698 // func+0: sp = sp - 16 ; allocate space for stack frame 699 // func+1: sp[12] = r0 ; save r0 at sp+12 700 // ... ; other code, not frame-related 701 // func+10: sp -= 4; *sp = x ; push some x on the stack 702 // ... ; other code, not frame-related 703 // func+20: r0 = sp[16] ; restore saved r0 704 // func+21: sp += 20 ; pop whole stack frame 705 // func+22: pc = *sp; sp += 4 ; pop return address and jump to it 706 // 707 // DWARF CFI is (a very compressed representation of) a table with a 708 // row for each machine instruction address and a column for each 709 // register showing how to restore it, if possible. 710 // 711 // A special column named "CFA", for "Canonical Frame Address", tells how 712 // to compute the base address of the frame; registers' entries may 713 // refer to the CFA in describing where the registers are saved. 714 // 715 // Another special column, named "RA", represents the return address. 716 // 717 // For example, here is a complete (uncompressed) table describing the 718 // function above: 719 // 720 // insn cfa r0 r1 ... ra 721 // ======================================= 722 // func+0: sp cfa[0] 723 // func+1: sp+16 cfa[0] 724 // func+2: sp+16 cfa[-4] cfa[0] 725 // func+11: sp+20 cfa[-4] cfa[0] 726 // func+21: sp+20 cfa[0] 727 // func+22: sp cfa[0] 728 // 729 // Some things to note here: 730 // 731 // - Each row describes the state of affairs *before* executing the 732 // instruction at the given address. Thus, the row for func+0 733 // describes the state before we allocate the stack frame. In the 734 // next row, the formula for computing the CFA has changed, 735 // reflecting that allocation. 736 // 737 // - The other entries are written in terms of the CFA; this allows 738 // them to remain unchanged as the stack pointer gets bumped around. 739 // For example, the rule for recovering the return address (the "ra" 740 // column) remains unchanged throughout the function, even as the 741 // stack pointer takes on three different offsets from the return 742 // address. 743 // 744 // - Although we haven't shown it, most calling conventions designate 745 // "callee-saves" and "caller-saves" registers. The callee must 746 // preserve the values of callee-saves registers; if it uses them, 747 // it must save their original values somewhere, and restore them 748 // before it returns. In contrast, the callee is free to trash 749 // caller-saves registers; if the callee uses these, it will 750 // probably not bother to save them anywhere, and the CFI will 751 // probably mark their values as "unrecoverable". 752 // 753 // (However, since the caller cannot assume the callee was going to 754 // save them, caller-saves registers are probably dead in the caller 755 // anyway, so compilers usually don't generate CFA for caller-saves 756 // registers.) 757 // 758 // - Exactly where the CFA points is a matter of convention that 759 // depends on the architecture and ABI in use. In the example, the 760 // CFA is the value the stack pointer had upon entry to the 761 // function, pointing at the saved return address. But on the x86, 762 // the call frame information generated by GCC follows the 763 // convention that the CFA is the address *after* the saved return 764 // address. 765 // 766 // But by definition, the CFA remains constant throughout the 767 // lifetime of the frame. This makes it a useful value for other 768 // columns to refer to. It is also gives debuggers a useful handle 769 // for identifying a frame. 770 // 771 // If you look at the table above, you'll notice that a given entry is 772 // often the same as the one immediately above it: most instructions 773 // change only one or two aspects of the stack frame, if they affect 774 // it at all. The DWARF format takes advantage of this fact, and 775 // reduces the size of the data by mentioning only the addresses and 776 // columns at which changes take place. So for the above, DWARF CFI 777 // data would only actually mention the following: 778 // 779 // insn cfa r0 r1 ... ra 780 // ======================================= 781 // func+0: sp cfa[0] 782 // func+1: sp+16 783 // func+2: cfa[-4] 784 // func+11: sp+20 785 // func+21: r0 786 // func+22: sp 787 // 788 // In fact, this is the way the parser reports CFI to the consumer: as 789 // a series of statements of the form, "At address X, column Y changed 790 // to Z," and related conventions for describing the initial state. 791 // 792 // Naturally, it would be impractical to have to scan the entire 793 // program's CFI, noting changes as we go, just to recover the 794 // unwinding rules in effect at one particular instruction. To avoid 795 // this, CFI data is grouped into "entries", each of which covers a 796 // specified range of addresses and begins with a complete statement 797 // of the rules for all recoverable registers at that starting 798 // address. Each entry typically covers a single function. 799 // 800 // Thus, to compute the contents of a given row of the table --- that 801 // is, rules for recovering the CFA, RA, and registers at a given 802 // instruction --- the consumer should find the entry that covers that 803 // instruction's address, start with the initial state supplied at the 804 // beginning of the entry, and work forward until it has processed all 805 // the changes up to and including those for the present instruction. 806 // 807 // There are seven kinds of rules that can appear in an entry of the 808 // table: 809 // 810 // - "undefined": The given register is not preserved by the callee; 811 // its value cannot be recovered. 812 // 813 // - "same value": This register has the same value it did in the callee. 814 // 815 // - offset(N): The register is saved at offset N from the CFA. 816 // 817 // - val_offset(N): The value the register had in the caller is the 818 // CFA plus offset N. (This is usually only useful for describing 819 // the stack pointer.) 820 // 821 // - register(R): The register's value was saved in another register R. 822 // 823 // - expression(E): Evaluating the DWARF expression E using the 824 // current frame's registers' values yields the address at which the 825 // register was saved. 826 // 827 // - val_expression(E): Evaluating the DWARF expression E using the 828 // current frame's registers' values yields the value the register 829 // had in the caller. 830 831 class CallFrameInfo { 832 public: 833 // The different kinds of entries one finds in CFI. Used internally, 834 // and for error reporting. 835 enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; 836 837 // The handler class to which the parser hands the parsed call frame 838 // information. Defined below. 839 class Handler; 840 841 // A reporter class, which CallFrameInfo uses to report errors 842 // encountered while parsing call frame information. Defined below. 843 class Reporter; 844 845 // Create a DWARF CFI parser. BUFFER points to the contents of the 846 // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. 847 // REPORTER is an error reporter the parser should use to report 848 // problems. READER is a ByteReader instance that has the endianness and 849 // address size set properly. Report the data we find to HANDLER. 850 // 851 // This class can also parse Linux C++ exception handling data, as found 852 // in '.eh_frame' sections. This data is a variant of DWARF CFI that is 853 // placed in loadable segments so that it is present in the program's 854 // address space, and is interpreted by the C++ runtime to search the 855 // call stack for a handler interested in the exception being thrown, 856 // actually pop the frames, and find cleanup code to run. 857 // 858 // There are two differences between the call frame information described 859 // in the DWARF standard and the exception handling data Linux places in 860 // the .eh_frame section: 861 // 862 // - Exception handling data uses uses a different format for call frame 863 // information entry headers. The distinguished CIE id, the way FDEs 864 // refer to their CIEs, and the way the end of the series of entries is 865 // determined are all slightly different. 866 // 867 // If the constructor's EH_FRAME argument is true, then the 868 // CallFrameInfo parses the entry headers as Linux C++ exception 869 // handling data. If EH_FRAME is false or omitted, the CallFrameInfo 870 // parses standard DWARF call frame information. 871 // 872 // - Linux C++ exception handling data uses CIE augmentation strings 873 // beginning with 'z' to specify the presence of additional data after 874 // the CIE and FDE headers and special encodings used for addresses in 875 // frame description entries. 876 // 877 // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or 878 // exception handling data if you have supplied READER with the base 879 // addresses needed to interpret the pointer encodings that 'z' 880 // augmentations can specify. See the ByteReader interface for details 881 // about the base addresses. See the CallFrameInfo::Handler interface 882 // for details about the additional information one might find in 883 // 'z'-augmented data. 884 // 885 // Thus: 886 // 887 // - If you are parsing standard DWARF CFI, as found in a .debug_frame 888 // section, you should pass false for the EH_FRAME argument, or omit 889 // it, and you need not worry about providing READER with the 890 // additional base addresses. 891 // 892 // - If you want to parse Linux C++ exception handling data from a 893 // .eh_frame section, you should pass EH_FRAME as true, and call 894 // READER's Set*Base member functions before calling our Start method. 895 // 896 // - If you want to parse DWARF CFI that uses the 'z' augmentations 897 // (although I don't think any toolchain ever emits such data), you 898 // could pass false for EH_FRAME, but call READER's Set*Base members. 899 // 900 // The extensions the Linux C++ ABI makes to DWARF for exception 901 // handling are described here, rather poorly: 902 // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html 903 // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html 904 // 905 // The mechanics of C++ exception handling, personality routines, 906 // and language-specific data areas are described here, rather nicely: 907 // http://www.codesourcery.com/public/cxx-abi/abi-eh.html 908 CallFrameInfo(const uint8_t *buffer, size_t buffer_length, 909 ByteReader *reader, Handler *handler, Reporter *reporter, 910 bool eh_frame = false) buffer_(buffer)911 : buffer_(buffer), buffer_length_(buffer_length), 912 reader_(reader), handler_(handler), reporter_(reporter), 913 eh_frame_(eh_frame) { } 914 ~CallFrameInfo()915 ~CallFrameInfo() { } 916 917 // Parse the entries in BUFFER, reporting what we find to HANDLER. 918 // Return true if we reach the end of the section successfully, or 919 // false if we encounter an error. 920 bool Start(); 921 922 // Return the textual name of KIND. For error reporting. 923 static const char *KindName(EntryKind kind); 924 925 private: 926 927 struct CIE; 928 929 // A CFI entry, either an FDE or a CIE. 930 struct Entry { 931 // The starting offset of the entry in the section, for error 932 // reporting. 933 size_t offset; 934 935 // The start of this entry in the buffer. 936 const uint8_t *start; 937 938 // Which kind of entry this is. 939 // 940 // We want to be able to use this for error reporting even while we're 941 // in the midst of parsing. Error reporting code may assume that kind, 942 // offset, and start fields are valid, although kind may be kUnknown. 943 EntryKind kind; 944 945 // The end of this entry's common prologue (initial length and id), and 946 // the start of this entry's kind-specific fields. 947 const uint8_t *fields; 948 949 // The start of this entry's instructions. 950 const uint8_t *instructions; 951 952 // The address past the entry's last byte in the buffer. (Note that 953 // since offset points to the entry's initial length field, and the 954 // length field is the number of bytes after that field, this is not 955 // simply buffer_ + offset + length.) 956 const uint8_t *end; 957 958 // For both DWARF CFI and .eh_frame sections, this is the CIE id in a 959 // CIE, and the offset of the associated CIE in an FDE. 960 uint64 id; 961 962 // The CIE that applies to this entry, if we've parsed it. If this is a 963 // CIE, then this field points to this structure. 964 CIE *cie; 965 }; 966 967 // A common information entry (CIE). 968 struct CIE: public Entry { 969 uint8 version; // CFI data version number 970 string augmentation; // vendor format extension markers 971 uint64 code_alignment_factor; // scale for code address adjustments 972 int data_alignment_factor; // scale for stack pointer adjustments 973 unsigned return_address_register; // which register holds the return addr 974 975 // True if this CIE includes Linux C++ ABI 'z' augmentation data. 976 bool has_z_augmentation; 977 978 // Parsed 'z' augmentation data. These are meaningful only if 979 // has_z_augmentation is true. 980 bool has_z_lsda; // The 'z' augmentation included 'L'. 981 bool has_z_personality; // The 'z' augmentation included 'P'. 982 bool has_z_signal_frame; // The 'z' augmentation included 'S'. 983 984 // If has_z_lsda is true, this is the encoding to be used for language- 985 // specific data area pointers in FDEs. 986 DwarfPointerEncoding lsda_encoding; 987 988 // If has_z_personality is true, this is the encoding used for the 989 // personality routine pointer in the augmentation data. 990 DwarfPointerEncoding personality_encoding; 991 992 // If has_z_personality is true, this is the address of the personality 993 // routine --- or, if personality_encoding & DW_EH_PE_indirect, the 994 // address where the personality routine's address is stored. 995 uint64 personality_address; 996 997 // This is the encoding used for addresses in the FDE header and 998 // in DW_CFA_set_loc instructions. This is always valid, whether 999 // or not we saw a 'z' augmentation string; its default value is 1000 // DW_EH_PE_absptr, which is what normal DWARF CFI uses. 1001 DwarfPointerEncoding pointer_encoding; 1002 1003 // These were only introduced in DWARF4, so will not be set in older 1004 // versions. 1005 uint8 address_size; 1006 uint8 segment_size; 1007 }; 1008 1009 // A frame description entry (FDE). 1010 struct FDE: public Entry { 1011 uint64 address; // start address of described code 1012 uint64 size; // size of described code, in bytes 1013 1014 // If cie->has_z_lsda is true, then this is the language-specific data 1015 // area's address --- or its address's address, if cie->lsda_encoding 1016 // has the DW_EH_PE_indirect bit set. 1017 uint64 lsda_address; 1018 }; 1019 1020 // Internal use. 1021 class Rule; 1022 class UndefinedRule; 1023 class SameValueRule; 1024 class OffsetRule; 1025 class ValOffsetRule; 1026 class RegisterRule; 1027 class ExpressionRule; 1028 class ValExpressionRule; 1029 class RuleMap; 1030 class State; 1031 1032 // Parse the initial length and id of a CFI entry, either a CIE, an FDE, 1033 // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the 1034 // data to parse. On success, populate ENTRY as appropriate, and return 1035 // true. On failure, report the problem, and return false. Even if we 1036 // return false, set ENTRY->end to the first byte after the entry if we 1037 // were able to figure that out, or NULL if we weren't. 1038 bool ReadEntryPrologue(const uint8_t *cursor, Entry *entry); 1039 1040 // Parse the fields of a CIE after the entry prologue, including any 'z' 1041 // augmentation data. Assume that the 'Entry' fields of CIE are 1042 // populated; use CIE->fields and CIE->end as the start and limit for 1043 // parsing. On success, populate the rest of *CIE, and return true; on 1044 // failure, report the problem and return false. 1045 bool ReadCIEFields(CIE *cie); 1046 1047 // Parse the fields of an FDE after the entry prologue, including any 'z' 1048 // augmentation data. Assume that the 'Entry' fields of *FDE are 1049 // initialized; use FDE->fields and FDE->end as the start and limit for 1050 // parsing. Assume that FDE->cie is fully initialized. On success, 1051 // populate the rest of *FDE, and return true; on failure, report the 1052 // problem and return false. 1053 bool ReadFDEFields(FDE *fde); 1054 1055 // Report that ENTRY is incomplete, and return false. This is just a 1056 // trivial wrapper for invoking reporter_->Incomplete; it provides a 1057 // little brevity. 1058 bool ReportIncomplete(Entry *entry); 1059 1060 // Return true if ENCODING has the DW_EH_PE_indirect bit set. IsIndirectEncoding(DwarfPointerEncoding encoding)1061 static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { 1062 return encoding & DW_EH_PE_indirect; 1063 } 1064 1065 // The contents of the DWARF .debug_info section we're parsing. 1066 const uint8_t *buffer_; 1067 size_t buffer_length_; 1068 1069 // For reading multi-byte values with the appropriate endianness. 1070 ByteReader *reader_; 1071 1072 // The handler to which we should report the data we find. 1073 Handler *handler_; 1074 1075 // For reporting problems in the info we're parsing. 1076 Reporter *reporter_; 1077 1078 // True if we are processing .eh_frame-format data. 1079 bool eh_frame_; 1080 }; 1081 1082 // The handler class for CallFrameInfo. The a CFI parser calls the 1083 // member functions of a handler object to report the data it finds. 1084 class CallFrameInfo::Handler { 1085 public: 1086 // The pseudo-register number for the canonical frame address. 1087 enum { kCFARegister = -1 }; 1088 Handler()1089 Handler() { } ~Handler()1090 virtual ~Handler() { } 1091 1092 // The parser has found CFI for the machine code at ADDRESS, 1093 // extending for LENGTH bytes. OFFSET is the offset of the frame 1094 // description entry in the section, for use in error messages. 1095 // VERSION is the version number of the CFI format. AUGMENTATION is 1096 // a string describing any producer-specific extensions present in 1097 // the data. RETURN_ADDRESS is the number of the register that holds 1098 // the address to which the function should return. 1099 // 1100 // Entry should return true to process this CFI, or false to skip to 1101 // the next entry. 1102 // 1103 // The parser invokes Entry for each Frame Description Entry (FDE) 1104 // it finds. The parser doesn't report Common Information Entries 1105 // to the handler explicitly; instead, if the handler elects to 1106 // process a given FDE, the parser reiterates the appropriate CIE's 1107 // contents at the beginning of the FDE's rules. 1108 virtual bool Entry(size_t offset, uint64 address, uint64 length, 1109 uint8 version, const string &augmentation, 1110 unsigned return_address) = 0; 1111 1112 // When the Entry function returns true, the parser calls these 1113 // handler functions repeatedly to describe the rules for recovering 1114 // registers at each instruction in the given range of machine code. 1115 // Immediately after a call to Entry, the handler should assume that 1116 // the rule for each callee-saves register is "unchanged" --- that 1117 // is, that the register still has the value it had in the caller. 1118 // 1119 // If a *Rule function returns true, we continue processing this entry's 1120 // instructions. If a *Rule function returns false, we stop evaluating 1121 // instructions, and skip to the next entry. Either way, we call End 1122 // before going on to the next entry. 1123 // 1124 // In all of these functions, if the REG parameter is kCFARegister, then 1125 // the rule describes how to find the canonical frame address. 1126 // kCFARegister may be passed as a BASE_REGISTER argument, meaning that 1127 // the canonical frame address should be used as the base address for the 1128 // computation. All other REG values will be positive. 1129 1130 // At ADDRESS, register REG's value is not recoverable. 1131 virtual bool UndefinedRule(uint64 address, int reg) = 0; 1132 1133 // At ADDRESS, register REG's value is the same as that it had in 1134 // the caller. 1135 virtual bool SameValueRule(uint64 address, int reg) = 0; 1136 1137 // At ADDRESS, register REG has been saved at offset OFFSET from 1138 // BASE_REGISTER. 1139 virtual bool OffsetRule(uint64 address, int reg, 1140 int base_register, long offset) = 0; 1141 1142 // At ADDRESS, the caller's value of register REG is the current 1143 // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an 1144 // address at which the register's value is saved.) 1145 virtual bool ValOffsetRule(uint64 address, int reg, 1146 int base_register, long offset) = 0; 1147 1148 // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs 1149 // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that 1150 // BASE_REGISTER is the "home" for REG's saved value: if you want to 1151 // assign to a variable whose home is REG in the calling frame, you 1152 // should put the value in BASE_REGISTER. 1153 virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0; 1154 1155 // At ADDRESS, the DWARF expression EXPRESSION yields the address at 1156 // which REG was saved. 1157 virtual bool ExpressionRule(uint64 address, int reg, 1158 const string &expression) = 0; 1159 1160 // At ADDRESS, the DWARF expression EXPRESSION yields the caller's 1161 // value for REG. (This rule doesn't provide an address at which the 1162 // register's value is saved.) 1163 virtual bool ValExpressionRule(uint64 address, int reg, 1164 const string &expression) = 0; 1165 1166 // Indicate that the rules for the address range reported by the 1167 // last call to Entry are complete. End should return true if 1168 // everything is okay, or false if an error has occurred and parsing 1169 // should stop. 1170 virtual bool End() = 0; 1171 1172 // Handler functions for Linux C++ exception handling data. These are 1173 // only called if the data includes 'z' augmentation strings. 1174 1175 // The Linux C++ ABI uses an extension of the DWARF CFI format to 1176 // walk the stack to propagate exceptions from the throw to the 1177 // appropriate catch, and do the appropriate cleanups along the way. 1178 // CFI entries used for exception handling have two additional data 1179 // associated with them: 1180 // 1181 // - The "language-specific data area" describes which exception 1182 // types the function has 'catch' clauses for, and indicates how 1183 // to go about re-entering the function at the appropriate catch 1184 // clause. If the exception is not caught, it describes the 1185 // destructors that must run before the frame is popped. 1186 // 1187 // - The "personality routine" is responsible for interpreting the 1188 // language-specific data area's contents, and deciding whether 1189 // the exception should continue to propagate down the stack, 1190 // perhaps after doing some cleanup for this frame, or whether the 1191 // exception will be caught here. 1192 // 1193 // In principle, the language-specific data area is opaque to 1194 // everybody but the personality routine. In practice, these values 1195 // may be useful or interesting to readers with extra context, and 1196 // we have to at least skip them anyway, so we might as well report 1197 // them to the handler. 1198 1199 // This entry's exception handling personality routine's address is 1200 // ADDRESS. If INDIRECT is true, then ADDRESS is the address at 1201 // which the routine's address is stored. The default definition for 1202 // this handler function simply returns true, allowing parsing of 1203 // the entry to continue. PersonalityRoutine(uint64 address,bool indirect)1204 virtual bool PersonalityRoutine(uint64 address, bool indirect) { 1205 return true; 1206 } 1207 1208 // This entry's language-specific data area (LSDA) is located at 1209 // ADDRESS. If INDIRECT is true, then ADDRESS is the address at 1210 // which the area's address is stored. The default definition for 1211 // this handler function simply returns true, allowing parsing of 1212 // the entry to continue. LanguageSpecificDataArea(uint64 address,bool indirect)1213 virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) { 1214 return true; 1215 } 1216 1217 // This entry describes a signal trampoline --- this frame is the 1218 // caller of a signal handler. The default definition for this 1219 // handler function simply returns true, allowing parsing of the 1220 // entry to continue. 1221 // 1222 // The best description of the rationale for and meaning of signal 1223 // trampoline CFI entries seems to be in the GCC bug database: 1224 // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 SignalHandler()1225 virtual bool SignalHandler() { return true; } 1226 }; 1227 1228 // The CallFrameInfo class makes calls on an instance of this class to 1229 // report errors or warn about problems in the data it is parsing. The 1230 // default definitions of these methods print a message to stderr, but 1231 // you can make a derived class that overrides them. 1232 class CallFrameInfo::Reporter { 1233 public: 1234 // Create an error reporter which attributes troubles to the section 1235 // named SECTION in FILENAME. 1236 // 1237 // Normally SECTION would be .debug_frame, but the Mac puts CFI data 1238 // in a Mach-O section named __debug_frame. If we support 1239 // Linux-style exception handling data, we could be reading an 1240 // .eh_frame section. 1241 Reporter(const string &filename, 1242 const string §ion = ".debug_frame") filename_(filename)1243 : filename_(filename), section_(section) { } ~Reporter()1244 virtual ~Reporter() { } 1245 1246 // The CFI entry at OFFSET ends too early to be well-formed. KIND 1247 // indicates what kind of entry it is; KIND can be kUnknown if we 1248 // haven't parsed enough of the entry to tell yet. 1249 virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind); 1250 1251 // The .eh_frame data has a four-byte zero at OFFSET where the next 1252 // entry's length would be; this is a terminator. However, the buffer 1253 // length as given to the CallFrameInfo constructor says there should be 1254 // more data. 1255 virtual void EarlyEHTerminator(uint64 offset); 1256 1257 // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the 1258 // section is not that large. 1259 virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset); 1260 1261 // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry 1262 // there is not a CIE. 1263 virtual void BadCIEId(uint64 offset, uint64 cie_offset); 1264 1265 // The FDE at OFFSET refers to a CIE with an address size we don't know how 1266 // to handle. 1267 virtual void UnexpectedAddressSize(uint64 offset, uint8_t address_size); 1268 1269 // The FDE at OFFSET refers to a CIE with an segment descriptor size we 1270 // don't know how to handle. 1271 virtual void UnexpectedSegmentSize(uint64 offset, uint8_t segment_size); 1272 1273 // The FDE at OFFSET refers to a CIE with version number VERSION, 1274 // which we don't recognize. We cannot parse DWARF CFI if it uses 1275 // a version number we don't recognize. 1276 virtual void UnrecognizedVersion(uint64 offset, int version); 1277 1278 // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, 1279 // which we don't recognize. We cannot parse DWARF CFI if it uses 1280 // augmentations we don't recognize. 1281 virtual void UnrecognizedAugmentation(uint64 offset, 1282 const string &augmentation); 1283 1284 // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not 1285 // a valid encoding. 1286 virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding); 1287 1288 // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends 1289 // on a base address which has not been supplied. 1290 virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding); 1291 1292 // The CIE at OFFSET contains a DW_CFA_restore instruction at 1293 // INSN_OFFSET, which may not appear in a CIE. 1294 virtual void RestoreInCIE(uint64 offset, uint64 insn_offset); 1295 1296 // The entry at OFFSET, of kind KIND, has an unrecognized 1297 // instruction at INSN_OFFSET. 1298 virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind, 1299 uint64 insn_offset); 1300 1301 // The instruction at INSN_OFFSET in the entry at OFFSET, of kind 1302 // KIND, establishes a rule that cites the CFA, but we have not 1303 // established a CFA rule yet. 1304 virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind, 1305 uint64 insn_offset); 1306 1307 // The instruction at INSN_OFFSET in the entry at OFFSET, of kind 1308 // KIND, is a DW_CFA_restore_state instruction, but the stack of 1309 // saved states is empty. 1310 virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind, 1311 uint64 insn_offset); 1312 1313 // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry 1314 // at OFFSET, of kind KIND, would restore a state that has no CFA 1315 // rule, whereas the current state does have a CFA rule. This is 1316 // bogus input, which the CallFrameInfo::Handler interface doesn't 1317 // (and shouldn't) have any way to report. 1318 virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind, 1319 uint64 insn_offset); 1320 1321 protected: 1322 // The name of the file whose CFI we're reading. 1323 string filename_; 1324 1325 // The name of the CFI section in that file. 1326 string section_; 1327 }; 1328 1329 } // namespace dwarf2reader 1330 1331 #endif // UTIL_DEBUGINFO_DWARF2READER_H__ 1332