1 // Copyright (c) 2010 Google Inc. All Rights Reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 // CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
30 
31 // Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit,
32 // and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details.
33 
34 #include "common/dwarf/dwarf2reader.h"
35 
36 #include <assert.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <string.h>
40 
41 #include <map>
42 #include <memory>
43 #include <stack>
44 #include <string>
45 #include <utility>
46 
47 #include <sys/stat.h>
48 
49 #include "common/dwarf/bytereader-inl.h"
50 #include "common/dwarf/bytereader.h"
51 #include "common/dwarf/line_state_machine.h"
52 #include "common/using_std_string.h"
53 
54 namespace dwarf2reader {
55 
CompilationUnit(const string & path,const SectionMap & sections,uint64 offset,ByteReader * reader,Dwarf2Handler * handler)56 CompilationUnit::CompilationUnit(const string& path,
57                                  const SectionMap& sections, uint64 offset,
58                                  ByteReader* reader, Dwarf2Handler* handler)
59     : path_(path), offset_from_section_start_(offset), reader_(reader),
60       sections_(sections), handler_(handler), abbrevs_(),
61       string_buffer_(NULL), string_buffer_length_(0),
62       str_offsets_buffer_(NULL), str_offsets_buffer_length_(0),
63       addr_buffer_(NULL), addr_buffer_length_(0),
64       is_split_dwarf_(false), dwo_id_(0), dwo_name_(),
65       skeleton_dwo_id_(0), ranges_base_(0), addr_base_(0),
66       have_checked_for_dwp_(false), dwp_path_(),
67       dwp_byte_reader_(), dwp_reader_() {}
68 
69 // Initialize a compilation unit from a .dwo or .dwp file.
70 // In this case, we need the .debug_addr section from the
71 // executable file that contains the corresponding skeleton
72 // compilation unit.  We also inherit the Dwarf2Handler from
73 // the executable file, and call it as if we were still
74 // processing the original compilation unit.
75 
SetSplitDwarf(const uint8_t * addr_buffer,uint64 addr_buffer_length,uint64 addr_base,uint64 ranges_base,uint64 dwo_id)76 void CompilationUnit::SetSplitDwarf(const uint8_t* addr_buffer,
77                                     uint64 addr_buffer_length,
78                                     uint64 addr_base,
79                                     uint64 ranges_base,
80                                     uint64 dwo_id) {
81   is_split_dwarf_ = true;
82   addr_buffer_ = addr_buffer;
83   addr_buffer_length_ = addr_buffer_length;
84   addr_base_ = addr_base;
85   ranges_base_ = ranges_base;
86   skeleton_dwo_id_ = dwo_id;
87 }
88 
89 // Read a DWARF2/3 abbreviation section.
90 // Each abbrev consists of a abbreviation number, a tag, a byte
91 // specifying whether the tag has children, and a list of
92 // attribute/form pairs.
93 // The list of forms is terminated by a 0 for the attribute, and a
94 // zero for the form.  The entire abbreviation section is terminated
95 // by a zero for the code.
96 
ReadAbbrevs()97 void CompilationUnit::ReadAbbrevs() {
98   if (abbrevs_)
99     return;
100 
101   // First get the debug_abbrev section.  ".debug_abbrev" is the name
102   // recommended in the DWARF spec, and used on Linux;
103   // "__debug_abbrev" is the name used in Mac OS X Mach-O files.
104   SectionMap::const_iterator iter = sections_.find(".debug_abbrev");
105   if (iter == sections_.end())
106     iter = sections_.find("__debug_abbrev");
107   assert(iter != sections_.end());
108 
109   abbrevs_ = new std::vector<Abbrev>;
110   abbrevs_->resize(1);
111 
112   // The only way to check whether we are reading over the end of the
113   // buffer would be to first compute the size of the leb128 data by
114   // reading it, then go back and read it again.
115   const uint8_t *abbrev_start = iter->second.first +
116                                       header_.abbrev_offset;
117   const uint8_t *abbrevptr = abbrev_start;
118 #ifndef NDEBUG
119   const uint64 abbrev_length = iter->second.second - header_.abbrev_offset;
120 #endif
121 
122   while (1) {
123     CompilationUnit::Abbrev abbrev;
124     size_t len;
125     const uint64 number = reader_->ReadUnsignedLEB128(abbrevptr, &len);
126 
127     if (number == 0)
128       break;
129     abbrev.number = number;
130     abbrevptr += len;
131 
132     assert(abbrevptr < abbrev_start + abbrev_length);
133     const uint64 tag = reader_->ReadUnsignedLEB128(abbrevptr, &len);
134     abbrevptr += len;
135     abbrev.tag = static_cast<enum DwarfTag>(tag);
136 
137     assert(abbrevptr < abbrev_start + abbrev_length);
138     abbrev.has_children = reader_->ReadOneByte(abbrevptr);
139     abbrevptr += 1;
140 
141     assert(abbrevptr < abbrev_start + abbrev_length);
142 
143     while (1) {
144       const uint64 nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
145       abbrevptr += len;
146 
147       assert(abbrevptr < abbrev_start + abbrev_length);
148       const uint64 formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
149       abbrevptr += len;
150       if (nametemp == 0 && formtemp == 0)
151         break;
152 
153       const enum DwarfAttribute name =
154         static_cast<enum DwarfAttribute>(nametemp);
155       const enum DwarfForm form = static_cast<enum DwarfForm>(formtemp);
156       abbrev.attributes.push_back(std::make_pair(name, form));
157     }
158     assert(abbrev.number == abbrevs_->size());
159     abbrevs_->push_back(abbrev);
160   }
161 }
162 
163 // Skips a single DIE's attributes.
SkipDIE(const uint8_t * start,const Abbrev & abbrev)164 const uint8_t *CompilationUnit::SkipDIE(const uint8_t* start,
165                                         const Abbrev& abbrev) {
166   for (AttributeList::const_iterator i = abbrev.attributes.begin();
167        i != abbrev.attributes.end();
168        i++)  {
169     start = SkipAttribute(start, i->second);
170   }
171   return start;
172 }
173 
174 // Skips a single attribute form's data.
SkipAttribute(const uint8_t * start,enum DwarfForm form)175 const uint8_t *CompilationUnit::SkipAttribute(const uint8_t *start,
176                                               enum DwarfForm form) {
177   size_t len;
178 
179   switch (form) {
180     case DW_FORM_indirect:
181       form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
182                                                                      &len));
183       start += len;
184       return SkipAttribute(start, form);
185 
186     case DW_FORM_flag_present:
187       return start;
188     case DW_FORM_data1:
189     case DW_FORM_flag:
190     case DW_FORM_ref1:
191       return start + 1;
192     case DW_FORM_ref2:
193     case DW_FORM_data2:
194       return start + 2;
195     case DW_FORM_ref4:
196     case DW_FORM_data4:
197       return start + 4;
198     case DW_FORM_ref8:
199     case DW_FORM_data8:
200     case DW_FORM_ref_sig8:
201       return start + 8;
202     case DW_FORM_string:
203       return start + strlen(reinterpret_cast<const char *>(start)) + 1;
204     case DW_FORM_udata:
205     case DW_FORM_ref_udata:
206     case DW_FORM_GNU_str_index:
207     case DW_FORM_GNU_addr_index:
208       reader_->ReadUnsignedLEB128(start, &len);
209       return start + len;
210 
211     case DW_FORM_sdata:
212       reader_->ReadSignedLEB128(start, &len);
213       return start + len;
214     case DW_FORM_addr:
215       return start + reader_->AddressSize();
216     case DW_FORM_ref_addr:
217       // DWARF2 and 3/4 differ on whether ref_addr is address size or
218       // offset size.
219       assert(header_.version >= 2);
220       if (header_.version == 2) {
221         return start + reader_->AddressSize();
222       } else if (header_.version >= 3) {
223         return start + reader_->OffsetSize();
224       }
225       break;
226 
227     case DW_FORM_block1:
228       return start + 1 + reader_->ReadOneByte(start);
229     case DW_FORM_block2:
230       return start + 2 + reader_->ReadTwoBytes(start);
231     case DW_FORM_block4:
232       return start + 4 + reader_->ReadFourBytes(start);
233     case DW_FORM_block:
234     case DW_FORM_exprloc: {
235       uint64 size = reader_->ReadUnsignedLEB128(start, &len);
236       return start + size + len;
237     }
238     case DW_FORM_strp:
239     case DW_FORM_sec_offset:
240       return start + reader_->OffsetSize();
241   }
242   fprintf(stderr,"Unhandled form type");
243   return NULL;
244 }
245 
246 // Read a DWARF2/3 header.
247 // The header is variable length in DWARF3 (and DWARF2 as extended by
248 // most compilers), and consists of an length field, a version number,
249 // the offset in the .debug_abbrev section for our abbrevs, and an
250 // address size.
ReadHeader()251 void CompilationUnit::ReadHeader() {
252   const uint8_t *headerptr = buffer_;
253   size_t initial_length_size;
254 
255   assert(headerptr + 4 < buffer_ + buffer_length_);
256   const uint64 initial_length
257     = reader_->ReadInitialLength(headerptr, &initial_length_size);
258   headerptr += initial_length_size;
259   header_.length = initial_length;
260 
261   assert(headerptr + 2 < buffer_ + buffer_length_);
262   header_.version = reader_->ReadTwoBytes(headerptr);
263   headerptr += 2;
264 
265   assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
266   header_.abbrev_offset = reader_->ReadOffset(headerptr);
267   headerptr += reader_->OffsetSize();
268 
269   // Compare against less than or equal because this may be the last
270   // section in the file.
271   assert(headerptr + 1 <= buffer_ + buffer_length_);
272   header_.address_size = reader_->ReadOneByte(headerptr);
273   reader_->SetAddressSize(header_.address_size);
274   headerptr += 1;
275 
276   after_header_ = headerptr;
277 
278   // This check ensures that we don't have to do checking during the
279   // reading of DIEs. header_.length does not include the size of the
280   // initial length.
281   assert(buffer_ + initial_length_size + header_.length <=
282         buffer_ + buffer_length_);
283 }
284 
Start()285 uint64 CompilationUnit::Start() {
286   // First get the debug_info section.  ".debug_info" is the name
287   // recommended in the DWARF spec, and used on Linux; "__debug_info"
288   // is the name used in Mac OS X Mach-O files.
289   SectionMap::const_iterator iter = sections_.find(".debug_info");
290   if (iter == sections_.end())
291     iter = sections_.find("__debug_info");
292   assert(iter != sections_.end());
293 
294   // Set up our buffer
295   buffer_ = iter->second.first + offset_from_section_start_;
296   buffer_length_ = iter->second.second - offset_from_section_start_;
297 
298   // Read the header
299   ReadHeader();
300 
301   // Figure out the real length from the end of the initial length to
302   // the end of the compilation unit, since that is the value we
303   // return.
304   uint64 ourlength = header_.length;
305   if (reader_->OffsetSize() == 8)
306     ourlength += 12;
307   else
308     ourlength += 4;
309 
310   // See if the user wants this compilation unit, and if not, just return.
311   if (!handler_->StartCompilationUnit(offset_from_section_start_,
312                                       reader_->AddressSize(),
313                                       reader_->OffsetSize(),
314                                       header_.length,
315                                       header_.version))
316     return ourlength;
317 
318   // Otherwise, continue by reading our abbreviation entries.
319   ReadAbbrevs();
320 
321   // Set the string section if we have one.  ".debug_str" is the name
322   // recommended in the DWARF spec, and used on Linux; "__debug_str"
323   // is the name used in Mac OS X Mach-O files.
324   iter = sections_.find(".debug_str");
325   if (iter == sections_.end())
326     iter = sections_.find("__debug_str");
327   if (iter != sections_.end()) {
328     string_buffer_ = iter->second.first;
329     string_buffer_length_ = iter->second.second;
330   }
331 
332   // Set the string offsets section if we have one.
333   iter = sections_.find(".debug_str_offsets");
334   if (iter != sections_.end()) {
335     str_offsets_buffer_ = iter->second.first;
336     str_offsets_buffer_length_ = iter->second.second;
337   }
338 
339   // Set the address section if we have one.
340   iter = sections_.find(".debug_addr");
341   if (iter != sections_.end()) {
342     addr_buffer_ = iter->second.first;
343     addr_buffer_length_ = iter->second.second;
344   }
345 
346   // Now that we have our abbreviations, start processing DIE's.
347   ProcessDIEs();
348 
349   // If this is a skeleton compilation unit generated with split DWARF,
350   // and the client needs the full debug info, we need to find the full
351   // compilation unit in a .dwo or .dwp file.
352   if (!is_split_dwarf_
353       && dwo_name_ != NULL
354       && handler_->NeedSplitDebugInfo())
355     ProcessSplitDwarf();
356 
357   return ourlength;
358 }
359 
360 // If one really wanted, you could merge SkipAttribute and
361 // ProcessAttribute
362 // This is all boring data manipulation and calling of the handler.
ProcessAttribute(uint64 dieoffset,const uint8_t * start,enum DwarfAttribute attr,enum DwarfForm form)363 const uint8_t *CompilationUnit::ProcessAttribute(
364     uint64 dieoffset, const uint8_t *start, enum DwarfAttribute attr,
365     enum DwarfForm form) {
366   size_t len;
367 
368   switch (form) {
369     // DW_FORM_indirect is never used because it is such a space
370     // waster.
371     case DW_FORM_indirect:
372       form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
373                                                                      &len));
374       start += len;
375       return ProcessAttribute(dieoffset, start, attr, form);
376 
377     case DW_FORM_flag_present:
378       ProcessAttributeUnsigned(dieoffset, attr, form, 1);
379       return start;
380     case DW_FORM_data1:
381     case DW_FORM_flag:
382       ProcessAttributeUnsigned(dieoffset, attr, form,
383                                reader_->ReadOneByte(start));
384       return start + 1;
385     case DW_FORM_data2:
386       ProcessAttributeUnsigned(dieoffset, attr, form,
387                                reader_->ReadTwoBytes(start));
388       return start + 2;
389     case DW_FORM_data4:
390       ProcessAttributeUnsigned(dieoffset, attr, form,
391                                reader_->ReadFourBytes(start));
392       return start + 4;
393     case DW_FORM_data8:
394       ProcessAttributeUnsigned(dieoffset, attr, form,
395                                reader_->ReadEightBytes(start));
396       return start + 8;
397     case DW_FORM_string: {
398       const char *str = reinterpret_cast<const char *>(start);
399       ProcessAttributeString(dieoffset, attr, form, str);
400       return start + strlen(str) + 1;
401     }
402     case DW_FORM_udata:
403       ProcessAttributeUnsigned(dieoffset, attr, form,
404                                reader_->ReadUnsignedLEB128(start, &len));
405       return start + len;
406 
407     case DW_FORM_sdata:
408       ProcessAttributeSigned(dieoffset, attr, form,
409                              reader_->ReadSignedLEB128(start, &len));
410       return start + len;
411     case DW_FORM_addr:
412       ProcessAttributeUnsigned(dieoffset, attr, form,
413                                reader_->ReadAddress(start));
414       return start + reader_->AddressSize();
415     case DW_FORM_sec_offset:
416       ProcessAttributeUnsigned(dieoffset, attr, form,
417                                reader_->ReadOffset(start));
418       return start + reader_->OffsetSize();
419 
420     case DW_FORM_ref1:
421       handler_->ProcessAttributeReference(dieoffset, attr, form,
422                                           reader_->ReadOneByte(start)
423                                           + offset_from_section_start_);
424       return start + 1;
425     case DW_FORM_ref2:
426       handler_->ProcessAttributeReference(dieoffset, attr, form,
427                                           reader_->ReadTwoBytes(start)
428                                           + offset_from_section_start_);
429       return start + 2;
430     case DW_FORM_ref4:
431       handler_->ProcessAttributeReference(dieoffset, attr, form,
432                                           reader_->ReadFourBytes(start)
433                                           + offset_from_section_start_);
434       return start + 4;
435     case DW_FORM_ref8:
436       handler_->ProcessAttributeReference(dieoffset, attr, form,
437                                           reader_->ReadEightBytes(start)
438                                           + offset_from_section_start_);
439       return start + 8;
440     case DW_FORM_ref_udata:
441       handler_->ProcessAttributeReference(dieoffset, attr, form,
442                                           reader_->ReadUnsignedLEB128(start,
443                                                                       &len)
444                                           + offset_from_section_start_);
445       return start + len;
446     case DW_FORM_ref_addr:
447       // DWARF2 and 3/4 differ on whether ref_addr is address size or
448       // offset size.
449       assert(header_.version >= 2);
450       if (header_.version == 2) {
451         handler_->ProcessAttributeReference(dieoffset, attr, form,
452                                             reader_->ReadAddress(start));
453         return start + reader_->AddressSize();
454       } else if (header_.version >= 3) {
455         handler_->ProcessAttributeReference(dieoffset, attr, form,
456                                             reader_->ReadOffset(start));
457         return start + reader_->OffsetSize();
458       }
459       break;
460     case DW_FORM_ref_sig8:
461       handler_->ProcessAttributeSignature(dieoffset, attr, form,
462                                           reader_->ReadEightBytes(start));
463       return start + 8;
464 
465     case DW_FORM_block1: {
466       uint64 datalen = reader_->ReadOneByte(start);
467       handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1,
468                                        datalen);
469       return start + 1 + datalen;
470     }
471     case DW_FORM_block2: {
472       uint64 datalen = reader_->ReadTwoBytes(start);
473       handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2,
474                                        datalen);
475       return start + 2 + datalen;
476     }
477     case DW_FORM_block4: {
478       uint64 datalen = reader_->ReadFourBytes(start);
479       handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4,
480                                        datalen);
481       return start + 4 + datalen;
482     }
483     case DW_FORM_block:
484     case DW_FORM_exprloc: {
485       uint64 datalen = reader_->ReadUnsignedLEB128(start, &len);
486       handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len,
487                                        datalen);
488       return start + datalen + len;
489     }
490     case DW_FORM_strp: {
491       assert(string_buffer_ != NULL);
492 
493       const uint64 offset = reader_->ReadOffset(start);
494       assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_);
495 
496       const char *str = reinterpret_cast<const char *>(string_buffer_ + offset);
497       ProcessAttributeString(dieoffset, attr, form, str);
498       return start + reader_->OffsetSize();
499     }
500 
501     case DW_FORM_GNU_str_index: {
502       uint64 str_index = reader_->ReadUnsignedLEB128(start, &len);
503       const uint8_t* offset_ptr =
504           str_offsets_buffer_ + str_index * reader_->OffsetSize();
505       const uint64 offset = reader_->ReadOffset(offset_ptr);
506       if (offset >= string_buffer_length_) {
507         return NULL;
508       }
509 
510       const char* str = reinterpret_cast<const char *>(string_buffer_) + offset;
511       ProcessAttributeString(dieoffset, attr, form, str);
512       return start + len;
513       break;
514     }
515     case DW_FORM_GNU_addr_index: {
516       uint64 addr_index = reader_->ReadUnsignedLEB128(start, &len);
517       const uint8_t* addr_ptr =
518           addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize();
519       ProcessAttributeUnsigned(dieoffset, attr, form,
520                                reader_->ReadAddress(addr_ptr));
521       return start + len;
522     }
523   }
524   fprintf(stderr, "Unhandled form type\n");
525   return NULL;
526 }
527 
ProcessDIE(uint64 dieoffset,const uint8_t * start,const Abbrev & abbrev)528 const uint8_t *CompilationUnit::ProcessDIE(uint64 dieoffset,
529                                            const uint8_t *start,
530                                            const Abbrev& abbrev) {
531   for (AttributeList::const_iterator i = abbrev.attributes.begin();
532        i != abbrev.attributes.end();
533        i++)  {
534     start = ProcessAttribute(dieoffset, start, i->first, i->second);
535   }
536 
537   // If this is a compilation unit in a split DWARF object, verify that
538   // the dwo_id matches. If it does not match, we will ignore this
539   // compilation unit.
540   if (abbrev.tag == DW_TAG_compile_unit
541       && is_split_dwarf_
542       && dwo_id_ != skeleton_dwo_id_) {
543     return NULL;
544   }
545 
546   return start;
547 }
548 
ProcessDIEs()549 void CompilationUnit::ProcessDIEs() {
550   const uint8_t *dieptr = after_header_;
551   size_t len;
552 
553   // lengthstart is the place the length field is based on.
554   // It is the point in the header after the initial length field
555   const uint8_t *lengthstart = buffer_;
556 
557   // In 64 bit dwarf, the initial length is 12 bytes, because of the
558   // 0xffffffff at the start.
559   if (reader_->OffsetSize() == 8)
560     lengthstart += 12;
561   else
562     lengthstart += 4;
563 
564   std::stack<uint64> die_stack;
565 
566   while (dieptr < (lengthstart + header_.length)) {
567     // We give the user the absolute offset from the beginning of
568     // debug_info, since they need it to deal with ref_addr forms.
569     uint64 absolute_offset = (dieptr - buffer_) + offset_from_section_start_;
570 
571     uint64 abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len);
572 
573     dieptr += len;
574 
575     // Abbrev == 0 represents the end of a list of children, or padding
576     // at the end of the compilation unit.
577     if (abbrev_num == 0) {
578       if (die_stack.size() == 0)
579         // If it is padding, then we are done with the compilation unit's DIEs.
580         return;
581       const uint64 offset = die_stack.top();
582       die_stack.pop();
583       handler_->EndDIE(offset);
584       continue;
585     }
586 
587     const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num));
588     const enum DwarfTag tag = abbrev.tag;
589     if (!handler_->StartDIE(absolute_offset, tag)) {
590       dieptr = SkipDIE(dieptr, abbrev);
591     } else {
592       dieptr = ProcessDIE(absolute_offset, dieptr, abbrev);
593     }
594 
595     if (abbrev.has_children) {
596       die_stack.push(absolute_offset);
597     } else {
598       handler_->EndDIE(absolute_offset);
599     }
600   }
601 }
602 
603 // Check for a valid ELF file and return the Address size.
604 // Returns 0 if not a valid ELF file.
GetElfWidth(const ElfReader & elf)605 inline int GetElfWidth(const ElfReader& elf) {
606   if (elf.IsElf32File())
607     return 4;
608   if (elf.IsElf64File())
609     return 8;
610   return 0;
611 }
612 
ProcessSplitDwarf()613 void CompilationUnit::ProcessSplitDwarf() {
614   struct stat statbuf;
615   if (!have_checked_for_dwp_) {
616     // Look for a .dwp file in the same directory as the executable.
617     have_checked_for_dwp_ = true;
618     string dwp_suffix(".dwp");
619     dwp_path_ = path_ + dwp_suffix;
620     if (stat(dwp_path_.c_str(), &statbuf) != 0) {
621       // Fall back to a split .debug file in the same directory.
622       string debug_suffix(".debug");
623       dwp_path_ = path_;
624       size_t found = path_.rfind(debug_suffix);
625       if (found + debug_suffix.length() == path_.length())
626         dwp_path_ = dwp_path_.replace(found, debug_suffix.length(), dwp_suffix);
627     }
628     if (stat(dwp_path_.c_str(), &statbuf) == 0) {
629       ElfReader* elf = new ElfReader(dwp_path_);
630       int width = GetElfWidth(*elf);
631       if (width != 0) {
632         dwp_byte_reader_.reset(new ByteReader(reader_->GetEndianness()));
633         dwp_byte_reader_->SetAddressSize(width);
634         dwp_reader_.reset(new DwpReader(*dwp_byte_reader_, elf));
635         dwp_reader_->Initialize();
636       } else {
637         delete elf;
638       }
639     }
640   }
641   bool found_in_dwp = false;
642   if (dwp_reader_) {
643     // If we have a .dwp file, read the debug sections for the requested CU.
644     SectionMap sections;
645     dwp_reader_->ReadDebugSectionsForCU(dwo_id_, &sections);
646     if (!sections.empty()) {
647       found_in_dwp = true;
648       CompilationUnit dwp_comp_unit(dwp_path_, sections, 0,
649                                     dwp_byte_reader_.get(), handler_);
650       dwp_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, addr_base_,
651                                   ranges_base_, dwo_id_);
652       dwp_comp_unit.Start();
653     }
654   }
655   if (!found_in_dwp) {
656     // If no .dwp file, try to open the .dwo file.
657     if (stat(dwo_name_, &statbuf) == 0) {
658       ElfReader elf(dwo_name_);
659       int width = GetElfWidth(elf);
660       if (width != 0) {
661         ByteReader reader(ENDIANNESS_LITTLE);
662         reader.SetAddressSize(width);
663         SectionMap sections;
664         ReadDebugSectionsFromDwo(&elf, &sections);
665         CompilationUnit dwo_comp_unit(dwo_name_, sections, 0, &reader,
666                                       handler_);
667         dwo_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_,
668                                     addr_base_, ranges_base_, dwo_id_);
669         dwo_comp_unit.Start();
670       }
671     }
672   }
673 }
674 
ReadDebugSectionsFromDwo(ElfReader * elf_reader,SectionMap * sections)675 void CompilationUnit::ReadDebugSectionsFromDwo(ElfReader* elf_reader,
676                                                SectionMap* sections) {
677   static const char* const section_names[] = {
678     ".debug_abbrev",
679     ".debug_info",
680     ".debug_str_offsets",
681     ".debug_str"
682   };
683   for (unsigned int i = 0u;
684        i < sizeof(section_names)/sizeof(*(section_names)); ++i) {
685     string base_name = section_names[i];
686     string dwo_name = base_name + ".dwo";
687     size_t section_size;
688     const char* section_data = elf_reader->GetSectionByName(dwo_name,
689                                                             &section_size);
690     if (section_data != NULL)
691       sections->insert(std::make_pair(
692           base_name, std::make_pair(
693              reinterpret_cast<const uint8_t *>(section_data),
694              section_size)));
695   }
696 }
697 
DwpReader(const ByteReader & byte_reader,ElfReader * elf_reader)698 DwpReader::DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader)
699     : elf_reader_(elf_reader), byte_reader_(byte_reader),
700       cu_index_(NULL), cu_index_size_(0), string_buffer_(NULL),
701       string_buffer_size_(0), version_(0), ncolumns_(0), nunits_(0),
702       nslots_(0), phash_(NULL), pindex_(NULL), shndx_pool_(NULL),
703       offset_table_(NULL), size_table_(NULL), abbrev_data_(NULL),
704       abbrev_size_(0), info_data_(NULL), info_size_(0),
705       str_offsets_data_(NULL), str_offsets_size_(0) {}
706 
~DwpReader()707 DwpReader::~DwpReader() {
708   if (elf_reader_) delete elf_reader_;
709 }
710 
Initialize()711 void DwpReader::Initialize() {
712   cu_index_ = elf_reader_->GetSectionByName(".debug_cu_index",
713                                             &cu_index_size_);
714   if (cu_index_ == NULL) {
715     return;
716   }
717   // The .debug_str.dwo section is shared by all CUs in the file.
718   string_buffer_ = elf_reader_->GetSectionByName(".debug_str.dwo",
719                                                  &string_buffer_size_);
720 
721   version_ = byte_reader_.ReadFourBytes(
722       reinterpret_cast<const uint8_t *>(cu_index_));
723 
724   if (version_ == 1) {
725     nslots_ = byte_reader_.ReadFourBytes(
726         reinterpret_cast<const uint8_t *>(cu_index_)
727         + 3 * sizeof(uint32));
728     phash_ = cu_index_ + 4 * sizeof(uint32);
729     pindex_ = phash_ + nslots_ * sizeof(uint64);
730     shndx_pool_ = pindex_ + nslots_ * sizeof(uint32);
731     if (shndx_pool_ >= cu_index_ + cu_index_size_) {
732       version_ = 0;
733     }
734   } else if (version_ == 2) {
735     ncolumns_ = byte_reader_.ReadFourBytes(
736         reinterpret_cast<const uint8_t *>(cu_index_) + sizeof(uint32));
737     nunits_ = byte_reader_.ReadFourBytes(
738         reinterpret_cast<const uint8_t *>(cu_index_) + 2 * sizeof(uint32));
739     nslots_ = byte_reader_.ReadFourBytes(
740         reinterpret_cast<const uint8_t *>(cu_index_) + 3 * sizeof(uint32));
741     phash_ = cu_index_ + 4 * sizeof(uint32);
742     pindex_ = phash_ + nslots_ * sizeof(uint64);
743     offset_table_ = pindex_ + nslots_ * sizeof(uint32);
744     size_table_ = offset_table_ + ncolumns_ * (nunits_ + 1) * sizeof(uint32);
745     abbrev_data_ = elf_reader_->GetSectionByName(".debug_abbrev.dwo",
746                                                  &abbrev_size_);
747     info_data_ = elf_reader_->GetSectionByName(".debug_info.dwo", &info_size_);
748     str_offsets_data_ = elf_reader_->GetSectionByName(".debug_str_offsets.dwo",
749                                                       &str_offsets_size_);
750     if (size_table_ >= cu_index_ + cu_index_size_) {
751       version_ = 0;
752     }
753   }
754 }
755 
ReadDebugSectionsForCU(uint64 dwo_id,SectionMap * sections)756 void DwpReader::ReadDebugSectionsForCU(uint64 dwo_id,
757                                        SectionMap* sections) {
758   if (version_ == 1) {
759     int slot = LookupCU(dwo_id);
760     if (slot == -1) {
761       return;
762     }
763 
764     // The index table points to the section index pool, where we
765     // can read a list of section indexes for the debug sections
766     // for the CU whose dwo_id we are looking for.
767     int index = byte_reader_.ReadFourBytes(
768         reinterpret_cast<const uint8_t *>(pindex_)
769         + slot * sizeof(uint32));
770     const char* shndx_list = shndx_pool_ + index * sizeof(uint32);
771     for (;;) {
772       if (shndx_list >= cu_index_ + cu_index_size_) {
773         version_ = 0;
774         return;
775       }
776       unsigned int shndx = byte_reader_.ReadFourBytes(
777           reinterpret_cast<const uint8_t *>(shndx_list));
778       shndx_list += sizeof(uint32);
779       if (shndx == 0)
780         break;
781       const char* section_name = elf_reader_->GetSectionName(shndx);
782       size_t section_size;
783       const char* section_data;
784       // We're only interested in these four debug sections.
785       // The section names in the .dwo file end with ".dwo", but we
786       // add them to the sections table with their normal names.
787       if (!strncmp(section_name, ".debug_abbrev", strlen(".debug_abbrev"))) {
788         section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
789         sections->insert(std::make_pair(
790             ".debug_abbrev",
791             std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
792                                                               section_size)));
793       } else if (!strncmp(section_name, ".debug_info", strlen(".debug_info"))) {
794         section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
795         sections->insert(std::make_pair(
796             ".debug_info",
797             std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
798                            section_size)));
799       } else if (!strncmp(section_name, ".debug_str_offsets",
800                           strlen(".debug_str_offsets"))) {
801         section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
802         sections->insert(std::make_pair(
803             ".debug_str_offsets",
804             std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
805                            section_size)));
806       }
807     }
808     sections->insert(std::make_pair(
809         ".debug_str",
810         std::make_pair(reinterpret_cast<const uint8_t *> (string_buffer_),
811                        string_buffer_size_)));
812   } else if (version_ == 2) {
813     uint32 index = LookupCUv2(dwo_id);
814     if (index == 0) {
815       return;
816     }
817 
818     // The index points to a row in each of the section offsets table
819     // and the section size table, where we can read the offsets and sizes
820     // of the contributions to each debug section from the CU whose dwo_id
821     // we are looking for. Row 0 of the section offsets table has the
822     // section ids for each column of the table. The size table begins
823     // with row 1.
824     const char* id_row = offset_table_;
825     const char* offset_row = offset_table_
826                              + index * ncolumns_ * sizeof(uint32);
827     const char* size_row =
828         size_table_ + (index - 1) * ncolumns_ * sizeof(uint32);
829     if (size_row + ncolumns_ * sizeof(uint32) > cu_index_ + cu_index_size_) {
830       version_ = 0;
831       return;
832     }
833     for (unsigned int col = 0u; col < ncolumns_; ++col) {
834       uint32 section_id =
835           byte_reader_.ReadFourBytes(reinterpret_cast<const uint8_t *>(id_row)
836                                      + col * sizeof(uint32));
837       uint32 offset = byte_reader_.ReadFourBytes(
838           reinterpret_cast<const uint8_t *>(offset_row)
839           + col * sizeof(uint32));
840       uint32 size = byte_reader_.ReadFourBytes(
841           reinterpret_cast<const uint8_t *>(size_row) + col * sizeof(uint32));
842       if (section_id == DW_SECT_ABBREV) {
843         sections->insert(std::make_pair(
844             ".debug_abbrev",
845             std::make_pair(reinterpret_cast<const uint8_t *> (abbrev_data_)
846                            + offset, size)));
847       } else if (section_id == DW_SECT_INFO) {
848         sections->insert(std::make_pair(
849             ".debug_info",
850             std::make_pair(reinterpret_cast<const uint8_t *> (info_data_)
851                            + offset, size)));
852       } else if (section_id == DW_SECT_STR_OFFSETS) {
853         sections->insert(std::make_pair(
854             ".debug_str_offsets",
855             std::make_pair(reinterpret_cast<const uint8_t *> (str_offsets_data_)
856                            + offset, size)));
857       }
858     }
859     sections->insert(std::make_pair(
860         ".debug_str",
861         std::make_pair(reinterpret_cast<const uint8_t *> (string_buffer_),
862                        string_buffer_size_)));
863   }
864 }
865 
LookupCU(uint64 dwo_id)866 int DwpReader::LookupCU(uint64 dwo_id) {
867   uint32 slot = static_cast<uint32>(dwo_id) & (nslots_ - 1);
868   uint64 probe = byte_reader_.ReadEightBytes(
869       reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64));
870   if (probe != 0 && probe != dwo_id) {
871     uint32 secondary_hash =
872         (static_cast<uint32>(dwo_id >> 32) & (nslots_ - 1)) | 1;
873     do {
874       slot = (slot + secondary_hash) & (nslots_ - 1);
875       probe = byte_reader_.ReadEightBytes(
876           reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64));
877     } while (probe != 0 && probe != dwo_id);
878   }
879   if (probe == 0)
880     return -1;
881   return slot;
882 }
883 
LookupCUv2(uint64 dwo_id)884 uint32 DwpReader::LookupCUv2(uint64 dwo_id) {
885   uint32 slot = static_cast<uint32>(dwo_id) & (nslots_ - 1);
886   uint64 probe = byte_reader_.ReadEightBytes(
887       reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64));
888   uint32 index = byte_reader_.ReadFourBytes(
889       reinterpret_cast<const uint8_t *>(pindex_) + slot * sizeof(uint32));
890   if (index != 0 && probe != dwo_id) {
891     uint32 secondary_hash =
892         (static_cast<uint32>(dwo_id >> 32) & (nslots_ - 1)) | 1;
893     do {
894       slot = (slot + secondary_hash) & (nslots_ - 1);
895       probe = byte_reader_.ReadEightBytes(
896           reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64));
897       index = byte_reader_.ReadFourBytes(
898           reinterpret_cast<const uint8_t *>(pindex_) + slot * sizeof(uint32));
899     } while (index != 0 && probe != dwo_id);
900   }
901   return index;
902 }
903 
LineInfo(const uint8_t * buffer,uint64 buffer_length,ByteReader * reader,LineInfoHandler * handler)904 LineInfo::LineInfo(const uint8_t *buffer, uint64 buffer_length,
905                    ByteReader* reader, LineInfoHandler* handler):
906     handler_(handler), reader_(reader), buffer_(buffer) {
907 #ifndef NDEBUG
908   buffer_length_ = buffer_length;
909 #endif
910   header_.std_opcode_lengths = NULL;
911 }
912 
Start()913 uint64 LineInfo::Start() {
914   ReadHeader();
915   ReadLines();
916   return after_header_ - buffer_;
917 }
918 
919 // The header for a debug_line section is mildly complicated, because
920 // the line info is very tightly encoded.
ReadHeader()921 void LineInfo::ReadHeader() {
922   const uint8_t *lineptr = buffer_;
923   size_t initial_length_size;
924 
925   const uint64 initial_length
926     = reader_->ReadInitialLength(lineptr, &initial_length_size);
927 
928   lineptr += initial_length_size;
929   header_.total_length = initial_length;
930   assert(buffer_ + initial_length_size + header_.total_length <=
931         buffer_ + buffer_length_);
932 
933   // Address size *must* be set by CU ahead of time.
934   assert(reader_->AddressSize() != 0);
935 
936   header_.version = reader_->ReadTwoBytes(lineptr);
937   lineptr += 2;
938 
939   header_.prologue_length = reader_->ReadOffset(lineptr);
940   lineptr += reader_->OffsetSize();
941 
942   header_.min_insn_length = reader_->ReadOneByte(lineptr);
943   lineptr += 1;
944 
945   if (header_.version >= 4) {
946     __attribute__((unused)) uint8 max_ops_per_insn =
947         reader_->ReadOneByte(lineptr);
948     ++lineptr;
949     assert(max_ops_per_insn == 1);
950   }
951 
952   header_.default_is_stmt = reader_->ReadOneByte(lineptr);
953   lineptr += 1;
954 
955   header_.line_base = *reinterpret_cast<const int8*>(lineptr);
956   lineptr += 1;
957 
958   header_.line_range = reader_->ReadOneByte(lineptr);
959   lineptr += 1;
960 
961   header_.opcode_base = reader_->ReadOneByte(lineptr);
962   lineptr += 1;
963 
964   header_.std_opcode_lengths = new std::vector<unsigned char>;
965   header_.std_opcode_lengths->resize(header_.opcode_base + 1);
966   (*header_.std_opcode_lengths)[0] = 0;
967   for (int i = 1; i < header_.opcode_base; i++) {
968     (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr);
969     lineptr += 1;
970   }
971 
972   // It is legal for the directory entry table to be empty.
973   if (*lineptr) {
974     uint32 dirindex = 1;
975     while (*lineptr) {
976       const char *dirname = reinterpret_cast<const char *>(lineptr);
977       handler_->DefineDir(dirname, dirindex);
978       lineptr += strlen(dirname) + 1;
979       dirindex++;
980     }
981   }
982   lineptr++;
983 
984   // It is also legal for the file entry table to be empty.
985   if (*lineptr) {
986     uint32 fileindex = 1;
987     size_t len;
988     while (*lineptr) {
989       const char *filename = reinterpret_cast<const char *>(lineptr);
990       lineptr += strlen(filename) + 1;
991 
992       uint64 dirindex = reader_->ReadUnsignedLEB128(lineptr, &len);
993       lineptr += len;
994 
995       uint64 mod_time = reader_->ReadUnsignedLEB128(lineptr, &len);
996       lineptr += len;
997 
998       uint64 filelength = reader_->ReadUnsignedLEB128(lineptr, &len);
999       lineptr += len;
1000       handler_->DefineFile(filename, fileindex, static_cast<uint32>(dirindex),
1001                            mod_time, filelength);
1002       fileindex++;
1003     }
1004   }
1005   lineptr++;
1006 
1007   after_header_ = lineptr;
1008 }
1009 
1010 /* static */
ProcessOneOpcode(ByteReader * reader,LineInfoHandler * handler,const struct LineInfoHeader & header,const uint8_t * start,struct LineStateMachine * lsm,size_t * len,uintptr pc,bool * lsm_passes_pc)1011 bool LineInfo::ProcessOneOpcode(ByteReader* reader,
1012                                 LineInfoHandler* handler,
1013                                 const struct LineInfoHeader &header,
1014                                 const uint8_t *start,
1015                                 struct LineStateMachine* lsm,
1016                                 size_t* len,
1017                                 uintptr pc,
1018                                 bool *lsm_passes_pc) {
1019   size_t oplen = 0;
1020   size_t templen;
1021   uint8 opcode = reader->ReadOneByte(start);
1022   oplen++;
1023   start++;
1024 
1025   // If the opcode is great than the opcode_base, it is a special
1026   // opcode. Most line programs consist mainly of special opcodes.
1027   if (opcode >= header.opcode_base) {
1028     opcode -= header.opcode_base;
1029     const int64 advance_address = (opcode / header.line_range)
1030                                   * header.min_insn_length;
1031     const int32 advance_line = (opcode % header.line_range)
1032                                + header.line_base;
1033 
1034     // Check if the lsm passes "pc". If so, mark it as passed.
1035     if (lsm_passes_pc &&
1036         lsm->address <= pc && pc < lsm->address + advance_address) {
1037       *lsm_passes_pc = true;
1038     }
1039 
1040     lsm->address += advance_address;
1041     lsm->line_num += advance_line;
1042     lsm->basic_block = true;
1043     *len = oplen;
1044     return true;
1045   }
1046 
1047   // Otherwise, we have the regular opcodes
1048   switch (opcode) {
1049     case DW_LNS_copy: {
1050       lsm->basic_block = false;
1051       *len = oplen;
1052       return true;
1053     }
1054 
1055     case DW_LNS_advance_pc: {
1056       uint64 advance_address = reader->ReadUnsignedLEB128(start, &templen);
1057       oplen += templen;
1058 
1059       // Check if the lsm passes "pc". If so, mark it as passed.
1060       if (lsm_passes_pc && lsm->address <= pc &&
1061           pc < lsm->address + header.min_insn_length * advance_address) {
1062         *lsm_passes_pc = true;
1063       }
1064 
1065       lsm->address += header.min_insn_length * advance_address;
1066     }
1067       break;
1068     case DW_LNS_advance_line: {
1069       const int64 advance_line = reader->ReadSignedLEB128(start, &templen);
1070       oplen += templen;
1071       lsm->line_num += static_cast<int32>(advance_line);
1072 
1073       // With gcc 4.2.1, we can get the line_no here for the first time
1074       // since DW_LNS_advance_line is called after DW_LNE_set_address is
1075       // called. So we check if the lsm passes "pc" here, not in
1076       // DW_LNE_set_address.
1077       if (lsm_passes_pc && lsm->address == pc) {
1078         *lsm_passes_pc = true;
1079       }
1080     }
1081       break;
1082     case DW_LNS_set_file: {
1083       const uint64 fileno = reader->ReadUnsignedLEB128(start, &templen);
1084       oplen += templen;
1085       lsm->file_num = static_cast<uint32>(fileno);
1086     }
1087       break;
1088     case DW_LNS_set_column: {
1089       const uint64 colno = reader->ReadUnsignedLEB128(start, &templen);
1090       oplen += templen;
1091       lsm->column_num = static_cast<uint32>(colno);
1092     }
1093       break;
1094     case DW_LNS_negate_stmt: {
1095       lsm->is_stmt = !lsm->is_stmt;
1096     }
1097       break;
1098     case DW_LNS_set_basic_block: {
1099       lsm->basic_block = true;
1100     }
1101       break;
1102     case DW_LNS_fixed_advance_pc: {
1103       const uint16 advance_address = reader->ReadTwoBytes(start);
1104       oplen += 2;
1105 
1106       // Check if the lsm passes "pc". If so, mark it as passed.
1107       if (lsm_passes_pc &&
1108           lsm->address <= pc && pc < lsm->address + advance_address) {
1109         *lsm_passes_pc = true;
1110       }
1111 
1112       lsm->address += advance_address;
1113     }
1114       break;
1115     case DW_LNS_const_add_pc: {
1116       const int64 advance_address = header.min_insn_length
1117                                     * ((255 - header.opcode_base)
1118                                        / header.line_range);
1119 
1120       // Check if the lsm passes "pc". If so, mark it as passed.
1121       if (lsm_passes_pc &&
1122           lsm->address <= pc && pc < lsm->address + advance_address) {
1123         *lsm_passes_pc = true;
1124       }
1125 
1126       lsm->address += advance_address;
1127     }
1128       break;
1129     case DW_LNS_extended_op: {
1130       const uint64 extended_op_len = reader->ReadUnsignedLEB128(start,
1131                                                                 &templen);
1132       start += templen;
1133       oplen += templen + extended_op_len;
1134 
1135       const uint64 extended_op = reader->ReadOneByte(start);
1136       start++;
1137 
1138       switch (extended_op) {
1139         case DW_LNE_end_sequence: {
1140           lsm->end_sequence = true;
1141           *len = oplen;
1142           return true;
1143         }
1144           break;
1145         case DW_LNE_set_address: {
1146           // With gcc 4.2.1, we cannot tell the line_no here since
1147           // DW_LNE_set_address is called before DW_LNS_advance_line is
1148           // called.  So we do not check if the lsm passes "pc" here.  See
1149           // also the comment in DW_LNS_advance_line.
1150           uint64 address = reader->ReadAddress(start);
1151           lsm->address = address;
1152         }
1153           break;
1154         case DW_LNE_define_file: {
1155           const char *filename = reinterpret_cast<const char *>(start);
1156 
1157           templen = strlen(filename) + 1;
1158           start += templen;
1159 
1160           uint64 dirindex = reader->ReadUnsignedLEB128(start, &templen);
1161           oplen += templen;
1162 
1163           const uint64 mod_time = reader->ReadUnsignedLEB128(start,
1164                                                              &templen);
1165           oplen += templen;
1166 
1167           const uint64 filelength = reader->ReadUnsignedLEB128(start,
1168                                                                &templen);
1169           oplen += templen;
1170 
1171           if (handler) {
1172             handler->DefineFile(filename, -1, static_cast<uint32>(dirindex),
1173                                 mod_time, filelength);
1174           }
1175         }
1176           break;
1177       }
1178     }
1179       break;
1180 
1181     default: {
1182       // Ignore unknown opcode  silently
1183       if (header.std_opcode_lengths) {
1184         for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) {
1185           reader->ReadUnsignedLEB128(start, &templen);
1186           start += templen;
1187           oplen += templen;
1188         }
1189       }
1190     }
1191       break;
1192   }
1193   *len = oplen;
1194   return false;
1195 }
1196 
ReadLines()1197 void LineInfo::ReadLines() {
1198   struct LineStateMachine lsm;
1199 
1200   // lengthstart is the place the length field is based on.
1201   // It is the point in the header after the initial length field
1202   const uint8_t *lengthstart = buffer_;
1203 
1204   // In 64 bit dwarf, the initial length is 12 bytes, because of the
1205   // 0xffffffff at the start.
1206   if (reader_->OffsetSize() == 8)
1207     lengthstart += 12;
1208   else
1209     lengthstart += 4;
1210 
1211   const uint8_t *lineptr = after_header_;
1212   lsm.Reset(header_.default_is_stmt);
1213 
1214   // The LineInfoHandler interface expects each line's length along
1215   // with its address, but DWARF only provides addresses (sans
1216   // length), and an end-of-sequence address; one infers the length
1217   // from the next address. So we report a line only when we get the
1218   // next line's address, or the end-of-sequence address.
1219   bool have_pending_line = false;
1220   uint64 pending_address = 0;
1221   uint32 pending_file_num = 0, pending_line_num = 0, pending_column_num = 0;
1222 
1223   while (lineptr < lengthstart + header_.total_length) {
1224     size_t oplength;
1225     bool add_row = ProcessOneOpcode(reader_, handler_, header_,
1226                                     lineptr, &lsm, &oplength, (uintptr)-1,
1227                                     NULL);
1228     if (add_row) {
1229       if (have_pending_line)
1230         handler_->AddLine(pending_address, lsm.address - pending_address,
1231                           pending_file_num, pending_line_num,
1232                           pending_column_num);
1233       if (lsm.end_sequence) {
1234         lsm.Reset(header_.default_is_stmt);
1235         have_pending_line = false;
1236       } else {
1237         pending_address = lsm.address;
1238         pending_file_num = lsm.file_num;
1239         pending_line_num = lsm.line_num;
1240         pending_column_num = lsm.column_num;
1241         have_pending_line = true;
1242       }
1243     }
1244     lineptr += oplength;
1245   }
1246 
1247   after_header_ = lengthstart + header_.total_length;
1248 }
1249 
RangeListReader(const uint8_t * buffer,uint64 size,ByteReader * reader,RangeListHandler * handler)1250 RangeListReader::RangeListReader(const uint8_t *buffer, uint64 size,
1251                                  ByteReader *reader, RangeListHandler *handler)
1252     : buffer_(buffer), size_(size), reader_(reader), handler_(handler) { }
1253 
ReadRangeList(uint64 offset)1254 bool RangeListReader::ReadRangeList(uint64 offset) {
1255   const uint64 max_address =
1256     (reader_->AddressSize() == 4) ? 0xffffffffUL
1257                                   : 0xffffffffffffffffULL;
1258   const uint64 entry_size = reader_->AddressSize() * 2;
1259   bool list_end = false;
1260 
1261   do {
1262     if (offset > size_ - entry_size) {
1263       return false; // Invalid range detected
1264     }
1265 
1266     uint64 start_address = reader_->ReadAddress(buffer_ + offset);
1267     uint64 end_address =
1268       reader_->ReadAddress(buffer_ + offset + reader_->AddressSize());
1269 
1270     if (start_address == max_address) { // Base address selection
1271       handler_->SetBaseAddress(end_address);
1272     } else if (start_address == 0 && end_address == 0) { // End-of-list
1273       handler_->Finish();
1274       list_end = true;
1275     } else { // Add a range entry
1276       handler_->AddRange(start_address, end_address);
1277     }
1278 
1279     offset += entry_size;
1280   } while (!list_end);
1281 
1282   return true;
1283 }
1284 
1285 // A DWARF rule for recovering the address or value of a register, or
1286 // computing the canonical frame address. There is one subclass of this for
1287 // each '*Rule' member function in CallFrameInfo::Handler.
1288 //
1289 // It's annoying that we have to handle Rules using pointers (because
1290 // the concrete instances can have an arbitrary size). They're small,
1291 // so it would be much nicer if we could just handle them by value
1292 // instead of fretting about ownership and destruction.
1293 //
1294 // It seems like all these could simply be instances of std::tr1::bind,
1295 // except that we need instances to be EqualityComparable, too.
1296 //
1297 // This could logically be nested within State, but then the qualified names
1298 // get horrendous.
1299 class CallFrameInfo::Rule {
1300  public:
~Rule()1301   virtual ~Rule() { }
1302 
1303   // Tell HANDLER that, at ADDRESS in the program, REG can be recovered using
1304   // this rule. If REG is kCFARegister, then this rule describes how to compute
1305   // the canonical frame address. Return what the HANDLER member function
1306   // returned.
1307   virtual bool Handle(Handler *handler,
1308                       uint64 address, int reg) const = 0;
1309 
1310   // Equality on rules. We use these to decide which rules we need
1311   // to report after a DW_CFA_restore_state instruction.
1312   virtual bool operator==(const Rule &rhs) const = 0;
1313 
operator !=(const Rule & rhs) const1314   bool operator!=(const Rule &rhs) const { return ! (*this == rhs); }
1315 
1316   // Return a pointer to a copy of this rule.
1317   virtual Rule *Copy() const = 0;
1318 
1319   // If this is a base+offset rule, change its base register to REG.
1320   // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.)
SetBaseRegister(unsigned reg)1321   virtual void SetBaseRegister(unsigned reg) { }
1322 
1323   // If this is a base+offset rule, change its offset to OFFSET. Otherwise,
1324   // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.)
SetOffset(long long offset)1325   virtual void SetOffset(long long offset) { }
1326 };
1327 
1328 // Rule: the value the register had in the caller cannot be recovered.
1329 class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule {
1330  public:
UndefinedRule()1331   UndefinedRule() { }
~UndefinedRule()1332   ~UndefinedRule() { }
Handle(Handler * handler,uint64 address,int reg) const1333   bool Handle(Handler *handler, uint64 address, int reg) const {
1334     return handler->UndefinedRule(address, reg);
1335   }
operator ==(const Rule & rhs) const1336   bool operator==(const Rule &rhs) const {
1337     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1338     // been carefully considered; cheap RTTI-like workarounds are forbidden.
1339     const UndefinedRule *our_rhs = dynamic_cast<const UndefinedRule *>(&rhs);
1340     return (our_rhs != NULL);
1341   }
Copy() const1342   Rule *Copy() const { return new UndefinedRule(*this); }
1343 };
1344 
1345 // Rule: the register's value is the same as that it had in the caller.
1346 class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule {
1347  public:
SameValueRule()1348   SameValueRule() { }
~SameValueRule()1349   ~SameValueRule() { }
Handle(Handler * handler,uint64 address,int reg) const1350   bool Handle(Handler *handler, uint64 address, int reg) const {
1351     return handler->SameValueRule(address, reg);
1352   }
operator ==(const Rule & rhs) const1353   bool operator==(const Rule &rhs) const {
1354     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1355     // been carefully considered; cheap RTTI-like workarounds are forbidden.
1356     const SameValueRule *our_rhs = dynamic_cast<const SameValueRule *>(&rhs);
1357     return (our_rhs != NULL);
1358   }
Copy() const1359   Rule *Copy() const { return new SameValueRule(*this); }
1360 };
1361 
1362 // Rule: the register is saved at OFFSET from BASE_REGISTER.  BASE_REGISTER
1363 // may be CallFrameInfo::Handler::kCFARegister.
1364 class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule {
1365  public:
OffsetRule(int base_register,long offset)1366   OffsetRule(int base_register, long offset)
1367       : base_register_(base_register), offset_(offset) { }
~OffsetRule()1368   ~OffsetRule() { }
Handle(Handler * handler,uint64 address,int reg) const1369   bool Handle(Handler *handler, uint64 address, int reg) const {
1370     return handler->OffsetRule(address, reg, base_register_, offset_);
1371   }
operator ==(const Rule & rhs) const1372   bool operator==(const Rule &rhs) const {
1373     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1374     // been carefully considered; cheap RTTI-like workarounds are forbidden.
1375     const OffsetRule *our_rhs = dynamic_cast<const OffsetRule *>(&rhs);
1376     return (our_rhs &&
1377             base_register_ == our_rhs->base_register_ &&
1378             offset_ == our_rhs->offset_);
1379   }
Copy() const1380   Rule *Copy() const { return new OffsetRule(*this); }
1381   // We don't actually need SetBaseRegister or SetOffset here, since they
1382   // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
1383   // doesn't make sense to use OffsetRule for computing the CFA: it
1384   // computes the address at which a register is saved, not a value.
1385  private:
1386   int base_register_;
1387   long offset_;
1388 };
1389 
1390 // Rule: the value the register had in the caller is the value of
1391 // BASE_REGISTER plus offset. BASE_REGISTER may be
1392 // CallFrameInfo::Handler::kCFARegister.
1393 class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule {
1394  public:
ValOffsetRule(int base_register,long offset)1395   ValOffsetRule(int base_register, long offset)
1396       : base_register_(base_register), offset_(offset) { }
~ValOffsetRule()1397   ~ValOffsetRule() { }
Handle(Handler * handler,uint64 address,int reg) const1398   bool Handle(Handler *handler, uint64 address, int reg) const {
1399     return handler->ValOffsetRule(address, reg, base_register_, offset_);
1400   }
operator ==(const Rule & rhs) const1401   bool operator==(const Rule &rhs) const {
1402     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1403     // been carefully considered; cheap RTTI-like workarounds are forbidden.
1404     const ValOffsetRule *our_rhs = dynamic_cast<const ValOffsetRule *>(&rhs);
1405     return (our_rhs &&
1406             base_register_ == our_rhs->base_register_ &&
1407             offset_ == our_rhs->offset_);
1408   }
Copy() const1409   Rule *Copy() const { return new ValOffsetRule(*this); }
SetBaseRegister(unsigned reg)1410   void SetBaseRegister(unsigned reg) { base_register_ = reg; }
SetOffset(long long offset)1411   void SetOffset(long long offset) { offset_ = offset; }
1412  private:
1413   int base_register_;
1414   long offset_;
1415 };
1416 
1417 // Rule: the register has been saved in another register REGISTER_NUMBER_.
1418 class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule {
1419  public:
RegisterRule(int register_number)1420   explicit RegisterRule(int register_number)
1421       : register_number_(register_number) { }
~RegisterRule()1422   ~RegisterRule() { }
Handle(Handler * handler,uint64 address,int reg) const1423   bool Handle(Handler *handler, uint64 address, int reg) const {
1424     return handler->RegisterRule(address, reg, register_number_);
1425   }
operator ==(const Rule & rhs) const1426   bool operator==(const Rule &rhs) const {
1427     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1428     // been carefully considered; cheap RTTI-like workarounds are forbidden.
1429     const RegisterRule *our_rhs = dynamic_cast<const RegisterRule *>(&rhs);
1430     return (our_rhs && register_number_ == our_rhs->register_number_);
1431   }
Copy() const1432   Rule *Copy() const { return new RegisterRule(*this); }
1433  private:
1434   int register_number_;
1435 };
1436 
1437 // Rule: EXPRESSION evaluates to the address at which the register is saved.
1438 class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule {
1439  public:
ExpressionRule(const string & expression)1440   explicit ExpressionRule(const string &expression)
1441       : expression_(expression) { }
~ExpressionRule()1442   ~ExpressionRule() { }
Handle(Handler * handler,uint64 address,int reg) const1443   bool Handle(Handler *handler, uint64 address, int reg) const {
1444     return handler->ExpressionRule(address, reg, expression_);
1445   }
operator ==(const Rule & rhs) const1446   bool operator==(const Rule &rhs) const {
1447     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1448     // been carefully considered; cheap RTTI-like workarounds are forbidden.
1449     const ExpressionRule *our_rhs = dynamic_cast<const ExpressionRule *>(&rhs);
1450     return (our_rhs && expression_ == our_rhs->expression_);
1451   }
Copy() const1452   Rule *Copy() const { return new ExpressionRule(*this); }
1453  private:
1454   string expression_;
1455 };
1456 
1457 // Rule: EXPRESSION evaluates to the address at which the register is saved.
1458 class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule {
1459  public:
ValExpressionRule(const string & expression)1460   explicit ValExpressionRule(const string &expression)
1461       : expression_(expression) { }
~ValExpressionRule()1462   ~ValExpressionRule() { }
Handle(Handler * handler,uint64 address,int reg) const1463   bool Handle(Handler *handler, uint64 address, int reg) const {
1464     return handler->ValExpressionRule(address, reg, expression_);
1465   }
operator ==(const Rule & rhs) const1466   bool operator==(const Rule &rhs) const {
1467     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1468     // been carefully considered; cheap RTTI-like workarounds are forbidden.
1469     const ValExpressionRule *our_rhs =
1470         dynamic_cast<const ValExpressionRule *>(&rhs);
1471     return (our_rhs && expression_ == our_rhs->expression_);
1472   }
Copy() const1473   Rule *Copy() const { return new ValExpressionRule(*this); }
1474  private:
1475   string expression_;
1476 };
1477 
1478 // A map from register numbers to rules.
1479 class CallFrameInfo::RuleMap {
1480  public:
RuleMap()1481   RuleMap() : cfa_rule_(NULL) { }
RuleMap(const RuleMap & rhs)1482   RuleMap(const RuleMap &rhs) : cfa_rule_(NULL) { *this = rhs; }
~RuleMap()1483   ~RuleMap() { Clear(); }
1484 
1485   RuleMap &operator=(const RuleMap &rhs);
1486 
1487   // Set the rule for computing the CFA to RULE. Take ownership of RULE.
SetCFARule(Rule * rule)1488   void SetCFARule(Rule *rule) { delete cfa_rule_; cfa_rule_ = rule; }
1489 
1490   // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains
1491   // ownership of the rule. We use this for DW_CFA_def_cfa_offset and
1492   // DW_CFA_def_cfa_register, and for detecting references to the CFA before
1493   // a rule for it has been established.
CFARule() const1494   Rule *CFARule() const { return cfa_rule_; }
1495 
1496   // Return the rule for REG, or NULL if there is none. The caller takes
1497   // ownership of the result.
1498   Rule *RegisterRule(int reg) const;
1499 
1500   // Set the rule for computing REG to RULE. Take ownership of RULE.
1501   void SetRegisterRule(int reg, Rule *rule);
1502 
1503   // Make all the appropriate calls to HANDLER as if we were changing from
1504   // this RuleMap to NEW_RULES at ADDRESS. We use this to implement
1505   // DW_CFA_restore_state, where lots of rules can change simultaneously.
1506   // Return true if all handlers returned true; otherwise, return false.
1507   bool HandleTransitionTo(Handler *handler, uint64 address,
1508                           const RuleMap &new_rules) const;
1509 
1510  private:
1511   // A map from register numbers to Rules.
1512   typedef std::map<int, Rule *> RuleByNumber;
1513 
1514   // Remove all register rules and clear cfa_rule_.
1515   void Clear();
1516 
1517   // The rule for computing the canonical frame address. This RuleMap owns
1518   // this rule.
1519   Rule *cfa_rule_;
1520 
1521   // A map from register numbers to postfix expressions to recover
1522   // their values. This RuleMap owns the Rules the map refers to.
1523   RuleByNumber registers_;
1524 };
1525 
operator =(const RuleMap & rhs)1526 CallFrameInfo::RuleMap &CallFrameInfo::RuleMap::operator=(const RuleMap &rhs) {
1527   Clear();
1528   // Since each map owns the rules it refers to, assignment must copy them.
1529   if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy();
1530   for (RuleByNumber::const_iterator it = rhs.registers_.begin();
1531        it != rhs.registers_.end(); it++)
1532     registers_[it->first] = it->second->Copy();
1533   return *this;
1534 }
1535 
RegisterRule(int reg) const1536 CallFrameInfo::Rule *CallFrameInfo::RuleMap::RegisterRule(int reg) const {
1537   assert(reg != Handler::kCFARegister);
1538   RuleByNumber::const_iterator it = registers_.find(reg);
1539   if (it != registers_.end())
1540     return it->second->Copy();
1541   else
1542     return NULL;
1543 }
1544 
SetRegisterRule(int reg,Rule * rule)1545 void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule *rule) {
1546   assert(reg != Handler::kCFARegister);
1547   assert(rule);
1548   Rule **slot = &registers_[reg];
1549   delete *slot;
1550   *slot = rule;
1551 }
1552 
HandleTransitionTo(Handler * handler,uint64 address,const RuleMap & new_rules) const1553 bool CallFrameInfo::RuleMap::HandleTransitionTo(
1554     Handler *handler,
1555     uint64 address,
1556     const RuleMap &new_rules) const {
1557   // Transition from cfa_rule_ to new_rules.cfa_rule_.
1558   if (cfa_rule_ && new_rules.cfa_rule_) {
1559     if (*cfa_rule_ != *new_rules.cfa_rule_ &&
1560         !new_rules.cfa_rule_->Handle(handler, address,
1561                                      Handler::kCFARegister))
1562       return false;
1563   } else if (cfa_rule_) {
1564     // this RuleMap has a CFA rule but new_rules doesn't.
1565     // CallFrameInfo::Handler has no way to handle this --- and shouldn't;
1566     // it's garbage input. The instruction interpreter should have
1567     // detected this and warned, so take no action here.
1568   } else if (new_rules.cfa_rule_) {
1569     // This shouldn't be possible: NEW_RULES is some prior state, and
1570     // there's no way to remove entries.
1571     assert(0);
1572   } else {
1573     // Both CFA rules are empty.  No action needed.
1574   }
1575 
1576   // Traverse the two maps in order by register number, and report
1577   // whatever differences we find.
1578   RuleByNumber::const_iterator old_it = registers_.begin();
1579   RuleByNumber::const_iterator new_it = new_rules.registers_.begin();
1580   while (old_it != registers_.end() && new_it != new_rules.registers_.end()) {
1581     if (old_it->first < new_it->first) {
1582       // This RuleMap has an entry for old_it->first, but NEW_RULES
1583       // doesn't.
1584       //
1585       // This isn't really the right thing to do, but since CFI generally
1586       // only mentions callee-saves registers, and GCC's convention for
1587       // callee-saves registers is that they are unchanged, it's a good
1588       // approximation.
1589       if (!handler->SameValueRule(address, old_it->first))
1590         return false;
1591       old_it++;
1592     } else if (old_it->first > new_it->first) {
1593       // NEW_RULES has entry for new_it->first, but this RuleMap
1594       // doesn't. This shouldn't be possible: NEW_RULES is some prior
1595       // state, and there's no way to remove entries.
1596       assert(0);
1597     } else {
1598       // Both maps have an entry for this register. Report the new
1599       // rule if it is different.
1600       if (*old_it->second != *new_it->second &&
1601           !new_it->second->Handle(handler, address, new_it->first))
1602         return false;
1603       new_it++, old_it++;
1604     }
1605   }
1606   // Finish off entries from this RuleMap with no counterparts in new_rules.
1607   while (old_it != registers_.end()) {
1608     if (!handler->SameValueRule(address, old_it->first))
1609       return false;
1610     old_it++;
1611   }
1612   // Since we only make transitions from a rule set to some previously
1613   // saved rule set, and we can only add rules to the map, NEW_RULES
1614   // must have fewer rules than *this.
1615   assert(new_it == new_rules.registers_.end());
1616 
1617   return true;
1618 }
1619 
1620 // Remove all register rules and clear cfa_rule_.
Clear()1621 void CallFrameInfo::RuleMap::Clear() {
1622   delete cfa_rule_;
1623   cfa_rule_ = NULL;
1624   for (RuleByNumber::iterator it = registers_.begin();
1625        it != registers_.end(); it++)
1626     delete it->second;
1627   registers_.clear();
1628 }
1629 
1630 // The state of the call frame information interpreter as it processes
1631 // instructions from a CIE and FDE.
1632 class CallFrameInfo::State {
1633  public:
1634   // Create a call frame information interpreter state with the given
1635   // reporter, reader, handler, and initial call frame info address.
State(ByteReader * reader,Handler * handler,Reporter * reporter,uint64 address)1636   State(ByteReader *reader, Handler *handler, Reporter *reporter,
1637         uint64 address)
1638       : reader_(reader), handler_(handler), reporter_(reporter),
1639         address_(address), entry_(NULL), cursor_(NULL) { }
1640 
1641   // Interpret instructions from CIE, save the resulting rule set for
1642   // DW_CFA_restore instructions, and return true. On error, report
1643   // the problem to reporter_ and return false.
1644   bool InterpretCIE(const CIE &cie);
1645 
1646   // Interpret instructions from FDE, and return true. On error,
1647   // report the problem to reporter_ and return false.
1648   bool InterpretFDE(const FDE &fde);
1649 
1650  private:
1651   // The operands of a CFI instruction, for ParseOperands.
1652   struct Operands {
1653     unsigned register_number;  // A register number.
1654     uint64 offset;             // An offset or address.
1655     long signed_offset;        // A signed offset.
1656     string expression;         // A DWARF expression.
1657   };
1658 
1659   // Parse CFI instruction operands from STATE's instruction stream as
1660   // described by FORMAT. On success, populate OPERANDS with the
1661   // results, and return true. On failure, report the problem and
1662   // return false.
1663   //
1664   // Each character of FORMAT should be one of the following:
1665   //
1666   //   'r'  unsigned LEB128 register number (OPERANDS->register_number)
1667   //   'o'  unsigned LEB128 offset          (OPERANDS->offset)
1668   //   's'  signed LEB128 offset            (OPERANDS->signed_offset)
1669   //   'a'  machine-size address            (OPERANDS->offset)
1670   //        (If the CIE has a 'z' augmentation string, 'a' uses the
1671   //        encoding specified by the 'R' argument.)
1672   //   '1'  a one-byte offset               (OPERANDS->offset)
1673   //   '2'  a two-byte offset               (OPERANDS->offset)
1674   //   '4'  a four-byte offset              (OPERANDS->offset)
1675   //   '8'  an eight-byte offset            (OPERANDS->offset)
1676   //   'e'  a DW_FORM_block holding a       (OPERANDS->expression)
1677   //        DWARF expression
1678   bool ParseOperands(const char *format, Operands *operands);
1679 
1680   // Interpret one CFI instruction from STATE's instruction stream, update
1681   // STATE, report any rule changes to handler_, and return true. On
1682   // failure, report the problem and return false.
1683   bool DoInstruction();
1684 
1685   // The following Do* member functions are subroutines of DoInstruction,
1686   // factoring out the actual work of operations that have several
1687   // different encodings.
1688 
1689   // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
1690   // return true. On failure, report and return false. (Used for
1691   // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
1692   bool DoDefCFA(unsigned base_register, long offset);
1693 
1694   // Change the offset of the CFA rule to OFFSET, and return true. On
1695   // failure, report and return false. (Subroutine for
1696   // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
1697   bool DoDefCFAOffset(long offset);
1698 
1699   // Specify that REG can be recovered using RULE, and return true. On
1700   // failure, report and return false.
1701   bool DoRule(unsigned reg, Rule *rule);
1702 
1703   // Specify that REG can be found at OFFSET from the CFA, and return true.
1704   // On failure, report and return false. (Subroutine for DW_CFA_offset,
1705   // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
1706   bool DoOffset(unsigned reg, long offset);
1707 
1708   // Specify that the caller's value for REG is the CFA plus OFFSET,
1709   // and return true. On failure, report and return false. (Subroutine
1710   // for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
1711   bool DoValOffset(unsigned reg, long offset);
1712 
1713   // Restore REG to the rule established in the CIE, and return true. On
1714   // failure, report and return false. (Subroutine for DW_CFA_restore and
1715   // DW_CFA_restore_extended.)
1716   bool DoRestore(unsigned reg);
1717 
1718   // Return the section offset of the instruction at cursor. For use
1719   // in error messages.
CursorOffset()1720   uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
1721 
1722   // Report that entry_ is incomplete, and return false. For brevity.
ReportIncomplete()1723   bool ReportIncomplete() {
1724     reporter_->Incomplete(entry_->offset, entry_->kind);
1725     return false;
1726   }
1727 
1728   // For reading multi-byte values with the appropriate endianness.
1729   ByteReader *reader_;
1730 
1731   // The handler to which we should report the data we find.
1732   Handler *handler_;
1733 
1734   // For reporting problems in the info we're parsing.
1735   Reporter *reporter_;
1736 
1737   // The code address to which the next instruction in the stream applies.
1738   uint64 address_;
1739 
1740   // The entry whose instructions we are currently processing. This is
1741   // first a CIE, and then an FDE.
1742   const Entry *entry_;
1743 
1744   // The next instruction to process.
1745   const uint8_t *cursor_;
1746 
1747   // The current set of rules.
1748   RuleMap rules_;
1749 
1750   // The set of rules established by the CIE, used by DW_CFA_restore
1751   // and DW_CFA_restore_extended. We set this after interpreting the
1752   // CIE's instructions.
1753   RuleMap cie_rules_;
1754 
1755   // A stack of saved states, for DW_CFA_remember_state and
1756   // DW_CFA_restore_state.
1757   std::stack<RuleMap> saved_rules_;
1758 };
1759 
InterpretCIE(const CIE & cie)1760 bool CallFrameInfo::State::InterpretCIE(const CIE &cie) {
1761   entry_ = &cie;
1762   cursor_ = entry_->instructions;
1763   while (cursor_ < entry_->end)
1764     if (!DoInstruction())
1765       return false;
1766   // Note the rules established by the CIE, for use by DW_CFA_restore
1767   // and DW_CFA_restore_extended.
1768   cie_rules_ = rules_;
1769   return true;
1770 }
1771 
InterpretFDE(const FDE & fde)1772 bool CallFrameInfo::State::InterpretFDE(const FDE &fde) {
1773   entry_ = &fde;
1774   cursor_ = entry_->instructions;
1775   while (cursor_ < entry_->end)
1776     if (!DoInstruction())
1777       return false;
1778   return true;
1779 }
1780 
ParseOperands(const char * format,Operands * operands)1781 bool CallFrameInfo::State::ParseOperands(const char *format,
1782                                          Operands *operands) {
1783   size_t len;
1784   const char *operand;
1785 
1786   for (operand = format; *operand; operand++) {
1787     size_t bytes_left = entry_->end - cursor_;
1788     switch (*operand) {
1789       case 'r':
1790         operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
1791         if (len > bytes_left) return ReportIncomplete();
1792         cursor_ += len;
1793         break;
1794 
1795       case 'o':
1796         operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
1797         if (len > bytes_left) return ReportIncomplete();
1798         cursor_ += len;
1799         break;
1800 
1801       case 's':
1802         operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
1803         if (len > bytes_left) return ReportIncomplete();
1804         cursor_ += len;
1805         break;
1806 
1807       case 'a':
1808         operands->offset =
1809           reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding,
1810                                       &len);
1811         if (len > bytes_left) return ReportIncomplete();
1812         cursor_ += len;
1813         break;
1814 
1815       case '1':
1816         if (1 > bytes_left) return ReportIncomplete();
1817         operands->offset = static_cast<unsigned char>(*cursor_++);
1818         break;
1819 
1820       case '2':
1821         if (2 > bytes_left) return ReportIncomplete();
1822         operands->offset = reader_->ReadTwoBytes(cursor_);
1823         cursor_ += 2;
1824         break;
1825 
1826       case '4':
1827         if (4 > bytes_left) return ReportIncomplete();
1828         operands->offset = reader_->ReadFourBytes(cursor_);
1829         cursor_ += 4;
1830         break;
1831 
1832       case '8':
1833         if (8 > bytes_left) return ReportIncomplete();
1834         operands->offset = reader_->ReadEightBytes(cursor_);
1835         cursor_ += 8;
1836         break;
1837 
1838       case 'e': {
1839         size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
1840         if (len > bytes_left || expression_length > bytes_left - len)
1841           return ReportIncomplete();
1842         cursor_ += len;
1843         operands->expression = string(reinterpret_cast<const char *>(cursor_),
1844                                       expression_length);
1845         cursor_ += expression_length;
1846         break;
1847       }
1848 
1849       default:
1850           assert(0);
1851     }
1852   }
1853 
1854   return true;
1855 }
1856 
DoInstruction()1857 bool CallFrameInfo::State::DoInstruction() {
1858   CIE *cie = entry_->cie;
1859   Operands ops;
1860 
1861   // Our entry's kind should have been set by now.
1862   assert(entry_->kind != kUnknown);
1863 
1864   // We shouldn't have been invoked unless there were more
1865   // instructions to parse.
1866   assert(cursor_ < entry_->end);
1867 
1868   unsigned opcode = *cursor_++;
1869   if ((opcode & 0xc0) != 0) {
1870     switch (opcode & 0xc0) {
1871       // Advance the address.
1872       case DW_CFA_advance_loc: {
1873         size_t code_offset = opcode & 0x3f;
1874         address_ += code_offset * cie->code_alignment_factor;
1875         break;
1876       }
1877 
1878       // Find a register at an offset from the CFA.
1879       case DW_CFA_offset:
1880         if (!ParseOperands("o", &ops) ||
1881             !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor))
1882           return false;
1883         break;
1884 
1885       // Restore the rule established for a register by the CIE.
1886       case DW_CFA_restore:
1887         if (!DoRestore(opcode & 0x3f)) return false;
1888         break;
1889 
1890       // The 'if' above should have excluded this possibility.
1891       default:
1892         assert(0);
1893     }
1894 
1895     // Return here, so the big switch below won't be indented.
1896     return true;
1897   }
1898 
1899   switch (opcode) {
1900     // Set the address.
1901     case DW_CFA_set_loc:
1902       if (!ParseOperands("a", &ops)) return false;
1903       address_ = ops.offset;
1904       break;
1905 
1906     // Advance the address.
1907     case DW_CFA_advance_loc1:
1908       if (!ParseOperands("1", &ops)) return false;
1909       address_ += ops.offset * cie->code_alignment_factor;
1910       break;
1911 
1912     // Advance the address.
1913     case DW_CFA_advance_loc2:
1914       if (!ParseOperands("2", &ops)) return false;
1915       address_ += ops.offset * cie->code_alignment_factor;
1916       break;
1917 
1918     // Advance the address.
1919     case DW_CFA_advance_loc4:
1920       if (!ParseOperands("4", &ops)) return false;
1921       address_ += ops.offset * cie->code_alignment_factor;
1922       break;
1923 
1924     // Advance the address.
1925     case DW_CFA_MIPS_advance_loc8:
1926       if (!ParseOperands("8", &ops)) return false;
1927       address_ += ops.offset * cie->code_alignment_factor;
1928       break;
1929 
1930     // Compute the CFA by adding an offset to a register.
1931     case DW_CFA_def_cfa:
1932       if (!ParseOperands("ro", &ops) ||
1933           !DoDefCFA(ops.register_number, ops.offset))
1934         return false;
1935       break;
1936 
1937     // Compute the CFA by adding an offset to a register.
1938     case DW_CFA_def_cfa_sf:
1939       if (!ParseOperands("rs", &ops) ||
1940           !DoDefCFA(ops.register_number,
1941                     ops.signed_offset * cie->data_alignment_factor))
1942         return false;
1943       break;
1944 
1945     // Change the base register used to compute the CFA.
1946     case DW_CFA_def_cfa_register: {
1947       if (!ParseOperands("r", &ops)) return false;
1948       Rule *cfa_rule = rules_.CFARule();
1949       if (!cfa_rule) {
1950         if (!DoDefCFA(ops.register_number, ops.offset)) {
1951           reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
1952           return false;
1953         }
1954       } else {
1955         cfa_rule->SetBaseRegister(ops.register_number);
1956         if (!cfa_rule->Handle(handler_, address_,
1957                               Handler::kCFARegister))
1958         return false;
1959       }
1960       break;
1961     }
1962 
1963     // Change the offset used to compute the CFA.
1964     case DW_CFA_def_cfa_offset:
1965       if (!ParseOperands("o", &ops) ||
1966           !DoDefCFAOffset(ops.offset))
1967         return false;
1968       break;
1969 
1970     // Change the offset used to compute the CFA.
1971     case DW_CFA_def_cfa_offset_sf:
1972       if (!ParseOperands("s", &ops) ||
1973           !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
1974         return false;
1975       break;
1976 
1977     // Specify an expression whose value is the CFA.
1978     case DW_CFA_def_cfa_expression: {
1979       if (!ParseOperands("e", &ops))
1980         return false;
1981       Rule *rule = new ValExpressionRule(ops.expression);
1982       rules_.SetCFARule(rule);
1983       if (!rule->Handle(handler_, address_,
1984                         Handler::kCFARegister))
1985         return false;
1986       break;
1987     }
1988 
1989     // The register's value cannot be recovered.
1990     case DW_CFA_undefined: {
1991       if (!ParseOperands("r", &ops) ||
1992           !DoRule(ops.register_number, new UndefinedRule()))
1993         return false;
1994       break;
1995     }
1996 
1997     // The register's value is unchanged from its value in the caller.
1998     case DW_CFA_same_value: {
1999       if (!ParseOperands("r", &ops) ||
2000           !DoRule(ops.register_number, new SameValueRule()))
2001         return false;
2002       break;
2003     }
2004 
2005     // Find a register at an offset from the CFA.
2006     case DW_CFA_offset_extended:
2007       if (!ParseOperands("ro", &ops) ||
2008           !DoOffset(ops.register_number,
2009                     ops.offset * cie->data_alignment_factor))
2010         return false;
2011       break;
2012 
2013     // The register is saved at an offset from the CFA.
2014     case DW_CFA_offset_extended_sf:
2015       if (!ParseOperands("rs", &ops) ||
2016           !DoOffset(ops.register_number,
2017                     ops.signed_offset * cie->data_alignment_factor))
2018         return false;
2019       break;
2020 
2021     // The register is saved at an offset from the CFA.
2022     case DW_CFA_GNU_negative_offset_extended:
2023       if (!ParseOperands("ro", &ops) ||
2024           !DoOffset(ops.register_number,
2025                     -ops.offset * cie->data_alignment_factor))
2026         return false;
2027       break;
2028 
2029     // The register's value is the sum of the CFA plus an offset.
2030     case DW_CFA_val_offset:
2031       if (!ParseOperands("ro", &ops) ||
2032           !DoValOffset(ops.register_number,
2033                        ops.offset * cie->data_alignment_factor))
2034         return false;
2035       break;
2036 
2037     // The register's value is the sum of the CFA plus an offset.
2038     case DW_CFA_val_offset_sf:
2039       if (!ParseOperands("rs", &ops) ||
2040           !DoValOffset(ops.register_number,
2041                        ops.signed_offset * cie->data_alignment_factor))
2042         return false;
2043       break;
2044 
2045     // The register has been saved in another register.
2046     case DW_CFA_register: {
2047       if (!ParseOperands("ro", &ops) ||
2048           !DoRule(ops.register_number, new RegisterRule(ops.offset)))
2049         return false;
2050       break;
2051     }
2052 
2053     // An expression yields the address at which the register is saved.
2054     case DW_CFA_expression: {
2055       if (!ParseOperands("re", &ops) ||
2056           !DoRule(ops.register_number, new ExpressionRule(ops.expression)))
2057         return false;
2058       break;
2059     }
2060 
2061     // An expression yields the caller's value for the register.
2062     case DW_CFA_val_expression: {
2063       if (!ParseOperands("re", &ops) ||
2064           !DoRule(ops.register_number, new ValExpressionRule(ops.expression)))
2065         return false;
2066       break;
2067     }
2068 
2069     // Restore the rule established for a register by the CIE.
2070     case DW_CFA_restore_extended:
2071       if (!ParseOperands("r", &ops) ||
2072           !DoRestore( ops.register_number))
2073         return false;
2074       break;
2075 
2076     // Save the current set of rules on a stack.
2077     case DW_CFA_remember_state:
2078       saved_rules_.push(rules_);
2079       break;
2080 
2081     // Pop the current set of rules off the stack.
2082     case DW_CFA_restore_state: {
2083       if (saved_rules_.empty()) {
2084         reporter_->EmptyStateStack(entry_->offset, entry_->kind,
2085                                    CursorOffset());
2086         return false;
2087       }
2088       const RuleMap &new_rules = saved_rules_.top();
2089       if (rules_.CFARule() && !new_rules.CFARule()) {
2090         reporter_->ClearingCFARule(entry_->offset, entry_->kind,
2091                                    CursorOffset());
2092         return false;
2093       }
2094       rules_.HandleTransitionTo(handler_, address_, new_rules);
2095       rules_ = new_rules;
2096       saved_rules_.pop();
2097       break;
2098     }
2099 
2100     // No operation.  (Padding instruction.)
2101     case DW_CFA_nop:
2102       break;
2103 
2104     // A SPARC register window save: Registers 8 through 15 (%o0-%o7)
2105     // are saved in registers 24 through 31 (%i0-%i7), and registers
2106     // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
2107     // (0-15 * the register size). The register numbers must be
2108     // hard-coded. A GNU extension, and not a pretty one.
2109     case DW_CFA_GNU_window_save: {
2110       // Save %o0-%o7 in %i0-%i7.
2111       for (int i = 8; i < 16; i++)
2112         if (!DoRule(i, new RegisterRule(i + 16)))
2113           return false;
2114       // Save %l0-%l7 and %i0-%i7 at the CFA.
2115       for (int i = 16; i < 32; i++)
2116         // Assume that the byte reader's address size is the same as
2117         // the architecture's register size. !@#%*^ hilarious.
2118         if (!DoRule(i, new OffsetRule(Handler::kCFARegister,
2119                                       (i - 16) * reader_->AddressSize())))
2120           return false;
2121       break;
2122     }
2123 
2124     // I'm not sure what this is. GDB doesn't use it for unwinding.
2125     case DW_CFA_GNU_args_size:
2126       if (!ParseOperands("o", &ops)) return false;
2127       break;
2128 
2129     // An opcode we don't recognize.
2130     default: {
2131       reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
2132       return false;
2133     }
2134   }
2135 
2136   return true;
2137 }
2138 
DoDefCFA(unsigned base_register,long offset)2139 bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
2140   Rule *rule = new ValOffsetRule(base_register, offset);
2141   rules_.SetCFARule(rule);
2142   return rule->Handle(handler_, address_,
2143                       Handler::kCFARegister);
2144 }
2145 
DoDefCFAOffset(long offset)2146 bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
2147   Rule *cfa_rule = rules_.CFARule();
2148   if (!cfa_rule) {
2149     reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2150     return false;
2151   }
2152   cfa_rule->SetOffset(offset);
2153   return cfa_rule->Handle(handler_, address_,
2154                           Handler::kCFARegister);
2155 }
2156 
DoRule(unsigned reg,Rule * rule)2157 bool CallFrameInfo::State::DoRule(unsigned reg, Rule *rule) {
2158   rules_.SetRegisterRule(reg, rule);
2159   return rule->Handle(handler_, address_, reg);
2160 }
2161 
DoOffset(unsigned reg,long offset)2162 bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
2163   if (!rules_.CFARule()) {
2164     reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2165     return false;
2166   }
2167   return DoRule(reg,
2168                 new OffsetRule(Handler::kCFARegister, offset));
2169 }
2170 
DoValOffset(unsigned reg,long offset)2171 bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
2172   if (!rules_.CFARule()) {
2173     reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2174     return false;
2175   }
2176   return DoRule(reg,
2177                 new ValOffsetRule(Handler::kCFARegister, offset));
2178 }
2179 
DoRestore(unsigned reg)2180 bool CallFrameInfo::State::DoRestore(unsigned reg) {
2181   // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
2182   if (entry_->kind == kCIE) {
2183     reporter_->RestoreInCIE(entry_->offset, CursorOffset());
2184     return false;
2185   }
2186   Rule *rule = cie_rules_.RegisterRule(reg);
2187   if (!rule) {
2188     // This isn't really the right thing to do, but since CFI generally
2189     // only mentions callee-saves registers, and GCC's convention for
2190     // callee-saves registers is that they are unchanged, it's a good
2191     // approximation.
2192     rule = new SameValueRule();
2193   }
2194   return DoRule(reg, rule);
2195 }
2196 
ReadEntryPrologue(const uint8_t * cursor,Entry * entry)2197 bool CallFrameInfo::ReadEntryPrologue(const uint8_t *cursor, Entry *entry) {
2198   const uint8_t *buffer_end = buffer_ + buffer_length_;
2199 
2200   // Initialize enough of ENTRY for use in error reporting.
2201   entry->offset = cursor - buffer_;
2202   entry->start = cursor;
2203   entry->kind = kUnknown;
2204   entry->end = NULL;
2205 
2206   // Read the initial length. This sets reader_'s offset size.
2207   size_t length_size;
2208   uint64 length = reader_->ReadInitialLength(cursor, &length_size);
2209   if (length_size > size_t(buffer_end - cursor))
2210     return ReportIncomplete(entry);
2211   cursor += length_size;
2212 
2213   // In a .eh_frame section, a length of zero marks the end of the series
2214   // of entries.
2215   if (length == 0 && eh_frame_) {
2216     entry->kind = kTerminator;
2217     entry->end = cursor;
2218     return true;
2219   }
2220 
2221   // Validate the length.
2222   if (length > size_t(buffer_end - cursor))
2223     return ReportIncomplete(entry);
2224 
2225   // The length is the number of bytes after the initial length field;
2226   // we have that position handy at this point, so compute the end
2227   // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
2228   // and the length didn't fit in a size_t, we would have rejected it
2229   // above.)
2230   entry->end = cursor + length;
2231 
2232   // Parse the next field: either the offset of a CIE or a CIE id.
2233   size_t offset_size = reader_->OffsetSize();
2234   if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
2235   entry->id = reader_->ReadOffset(cursor);
2236 
2237   // Don't advance cursor past id field yet; in .eh_frame data we need
2238   // the id's position to compute the section offset of an FDE's CIE.
2239 
2240   // Now we can decide what kind of entry this is.
2241   if (eh_frame_) {
2242     // In .eh_frame data, an ID of zero marks the entry as a CIE, and
2243     // anything else is an offset from the id field of the FDE to the start
2244     // of the CIE.
2245     if (entry->id == 0) {
2246       entry->kind = kCIE;
2247     } else {
2248       entry->kind = kFDE;
2249       // Turn the offset from the id into an offset from the buffer's start.
2250       entry->id = (cursor - buffer_) - entry->id;
2251     }
2252   } else {
2253     // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
2254     // offset size for the entry) marks the entry as a CIE, and anything
2255     // else is the offset of the CIE from the beginning of the section.
2256     if (offset_size == 4)
2257       entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE;
2258     else {
2259       assert(offset_size == 8);
2260       entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE;
2261     }
2262   }
2263 
2264   // Now advance cursor past the id.
2265    cursor += offset_size;
2266 
2267   // The fields specific to this kind of entry start here.
2268   entry->fields = cursor;
2269 
2270   entry->cie = NULL;
2271 
2272   return true;
2273 }
2274 
ReadCIEFields(CIE * cie)2275 bool CallFrameInfo::ReadCIEFields(CIE *cie) {
2276   const uint8_t *cursor = cie->fields;
2277   size_t len;
2278 
2279   assert(cie->kind == kCIE);
2280 
2281   // Prepare for early exit.
2282   cie->version = 0;
2283   cie->augmentation.clear();
2284   cie->code_alignment_factor = 0;
2285   cie->data_alignment_factor = 0;
2286   cie->return_address_register = 0;
2287   cie->has_z_augmentation = false;
2288   cie->pointer_encoding = DW_EH_PE_absptr;
2289   cie->instructions = 0;
2290 
2291   // Parse the version number.
2292   if (cie->end - cursor < 1)
2293     return ReportIncomplete(cie);
2294   cie->version = reader_->ReadOneByte(cursor);
2295   cursor++;
2296 
2297   // If we don't recognize the version, we can't parse any more fields of the
2298   // CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a
2299   // version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well;
2300   // the difference between those versions seems to be the same as for
2301   // .debug_frame.
2302   if (cie->version < 1 || cie->version > 4) {
2303     reporter_->UnrecognizedVersion(cie->offset, cie->version);
2304     return false;
2305   }
2306 
2307   const uint8_t *augmentation_start = cursor;
2308   const uint8_t *augmentation_end =
2309       reinterpret_cast<const uint8_t *>(memchr(augmentation_start, '\0',
2310                                                cie->end - augmentation_start));
2311   if (! augmentation_end) return ReportIncomplete(cie);
2312   cursor = augmentation_end;
2313   cie->augmentation = string(reinterpret_cast<const char *>(augmentation_start),
2314                              cursor - augmentation_start);
2315   // Skip the terminating '\0'.
2316   cursor++;
2317 
2318   // Is this CFI augmented?
2319   if (!cie->augmentation.empty()) {
2320     // Is it an augmentation we recognize?
2321     if (cie->augmentation[0] == DW_Z_augmentation_start) {
2322       // Linux C++ ABI 'z' augmentation, used for exception handling data.
2323       cie->has_z_augmentation = true;
2324     } else {
2325       // Not an augmentation we recognize. Augmentations can have arbitrary
2326       // effects on the form of rest of the content, so we have to give up.
2327       reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
2328       return false;
2329     }
2330   }
2331 
2332   if (cie->version >= 4) {
2333     cie->address_size = *cursor++;
2334     if (cie->address_size != 8 && cie->address_size != 4) {
2335       reporter_->UnexpectedAddressSize(cie->offset, cie->address_size);
2336       return false;
2337     }
2338 
2339     cie->segment_size = *cursor++;
2340     if (cie->segment_size != 0) {
2341       reporter_->UnexpectedSegmentSize(cie->offset, cie->segment_size);
2342       return false;
2343     }
2344   }
2345 
2346   // Parse the code alignment factor.
2347   cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
2348   if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2349   cursor += len;
2350 
2351   // Parse the data alignment factor.
2352   cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
2353   if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2354   cursor += len;
2355 
2356   // Parse the return address register. This is a ubyte in version 1, and
2357   // a ULEB128 in version 3.
2358   if (cie->version == 1) {
2359     if (cursor >= cie->end) return ReportIncomplete(cie);
2360     cie->return_address_register = uint8(*cursor++);
2361   } else {
2362     cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
2363     if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2364     cursor += len;
2365   }
2366 
2367   // If we have a 'z' augmentation string, find the augmentation data and
2368   // use the augmentation string to parse it.
2369   if (cie->has_z_augmentation) {
2370     uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
2371     if (size_t(cie->end - cursor) < len + data_size)
2372       return ReportIncomplete(cie);
2373     cursor += len;
2374     const uint8_t *data = cursor;
2375     cursor += data_size;
2376     const uint8_t *data_end = cursor;
2377 
2378     cie->has_z_lsda = false;
2379     cie->has_z_personality = false;
2380     cie->has_z_signal_frame = false;
2381 
2382     // Walk the augmentation string, and extract values from the
2383     // augmentation data as the string directs.
2384     for (size_t i = 1; i < cie->augmentation.size(); i++) {
2385       switch (cie->augmentation[i]) {
2386         case DW_Z_has_LSDA:
2387           // The CIE's augmentation data holds the language-specific data
2388           // area pointer's encoding, and the FDE's augmentation data holds
2389           // the pointer itself.
2390           cie->has_z_lsda = true;
2391           // Fetch the LSDA encoding from the augmentation data.
2392           if (data >= data_end) return ReportIncomplete(cie);
2393           cie->lsda_encoding = DwarfPointerEncoding(*data++);
2394           if (!reader_->ValidEncoding(cie->lsda_encoding)) {
2395             reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
2396             return false;
2397           }
2398           // Don't check if the encoding is usable here --- we haven't
2399           // read the FDE's fields yet, so we're not prepared for
2400           // DW_EH_PE_funcrel, although that's a fine encoding for the
2401           // LSDA to use, since it appears in the FDE.
2402           break;
2403 
2404         case DW_Z_has_personality_routine:
2405           // The CIE's augmentation data holds the personality routine
2406           // pointer's encoding, followed by the pointer itself.
2407           cie->has_z_personality = true;
2408           // Fetch the personality routine pointer's encoding from the
2409           // augmentation data.
2410           if (data >= data_end) return ReportIncomplete(cie);
2411           cie->personality_encoding = DwarfPointerEncoding(*data++);
2412           if (!reader_->ValidEncoding(cie->personality_encoding)) {
2413             reporter_->InvalidPointerEncoding(cie->offset,
2414                                               cie->personality_encoding);
2415             return false;
2416           }
2417           if (!reader_->UsableEncoding(cie->personality_encoding)) {
2418             reporter_->UnusablePointerEncoding(cie->offset,
2419                                                cie->personality_encoding);
2420             return false;
2421           }
2422           // Fetch the personality routine's pointer itself from the data.
2423           cie->personality_address =
2424             reader_->ReadEncodedPointer(data, cie->personality_encoding,
2425                                         &len);
2426           if (len > size_t(data_end - data))
2427             return ReportIncomplete(cie);
2428           data += len;
2429           break;
2430 
2431         case DW_Z_has_FDE_address_encoding:
2432           // The CIE's augmentation data holds the pointer encoding to use
2433           // for addresses in the FDE.
2434           if (data >= data_end) return ReportIncomplete(cie);
2435           cie->pointer_encoding = DwarfPointerEncoding(*data++);
2436           if (!reader_->ValidEncoding(cie->pointer_encoding)) {
2437             reporter_->InvalidPointerEncoding(cie->offset,
2438                                               cie->pointer_encoding);
2439             return false;
2440           }
2441           if (!reader_->UsableEncoding(cie->pointer_encoding)) {
2442             reporter_->UnusablePointerEncoding(cie->offset,
2443                                                cie->pointer_encoding);
2444             return false;
2445           }
2446           break;
2447 
2448         case DW_Z_is_signal_trampoline:
2449           // Frames using this CIE are signal delivery frames.
2450           cie->has_z_signal_frame = true;
2451           break;
2452 
2453         default:
2454           // An augmentation we don't recognize.
2455           reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
2456           return false;
2457       }
2458     }
2459   }
2460 
2461   // The CIE's instructions start here.
2462   cie->instructions = cursor;
2463 
2464   return true;
2465 }
2466 
ReadFDEFields(FDE * fde)2467 bool CallFrameInfo::ReadFDEFields(FDE *fde) {
2468   const uint8_t *cursor = fde->fields;
2469   size_t size;
2470 
2471   fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding,
2472                                              &size);
2473   if (size > size_t(fde->end - cursor))
2474     return ReportIncomplete(fde);
2475   cursor += size;
2476   reader_->SetFunctionBase(fde->address);
2477 
2478   // For the length, we strip off the upper nybble of the encoding used for
2479   // the starting address.
2480   DwarfPointerEncoding length_encoding =
2481     DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f);
2482   fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
2483   if (size > size_t(fde->end - cursor))
2484     return ReportIncomplete(fde);
2485   cursor += size;
2486 
2487   // If the CIE has a 'z' augmentation string, then augmentation data
2488   // appears here.
2489   if (fde->cie->has_z_augmentation) {
2490     uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
2491     if (size_t(fde->end - cursor) < size + data_size)
2492       return ReportIncomplete(fde);
2493     cursor += size;
2494 
2495     // In the abstract, we should walk the augmentation string, and extract
2496     // items from the FDE's augmentation data as we encounter augmentation
2497     // string characters that specify their presence: the ordering of items
2498     // in the augmentation string determines the arrangement of values in
2499     // the augmentation data.
2500     //
2501     // In practice, there's only ever one value in FDE augmentation data
2502     // that we support --- the LSDA pointer --- and we have to bail if we
2503     // see any unrecognized augmentation string characters. So if there is
2504     // anything here at all, we know what it is, and where it starts.
2505     if (fde->cie->has_z_lsda) {
2506       // Check whether the LSDA's pointer encoding is usable now: only once
2507       // we've parsed the FDE's starting address do we call reader_->
2508       // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
2509       // usable.
2510       if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
2511         reporter_->UnusablePointerEncoding(fde->cie->offset,
2512                                            fde->cie->lsda_encoding);
2513         return false;
2514       }
2515 
2516       fde->lsda_address =
2517         reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
2518       if (size > data_size)
2519         return ReportIncomplete(fde);
2520       // Ideally, we would also complain here if there were unconsumed
2521       // augmentation data.
2522     }
2523 
2524     cursor += data_size;
2525   }
2526 
2527   // The FDE's instructions start after those.
2528   fde->instructions = cursor;
2529 
2530   return true;
2531 }
2532 
Start()2533 bool CallFrameInfo::Start() {
2534   const uint8_t *buffer_end = buffer_ + buffer_length_;
2535   const uint8_t *cursor;
2536   bool all_ok = true;
2537   const uint8_t *entry_end;
2538   bool ok;
2539 
2540   // Traverse all the entries in buffer_, skipping CIEs and offering
2541   // FDEs to the handler.
2542   for (cursor = buffer_; cursor < buffer_end;
2543        cursor = entry_end, all_ok = all_ok && ok) {
2544     FDE fde;
2545 
2546     // Make it easy to skip this entry with 'continue': assume that
2547     // things are not okay until we've checked all the data, and
2548     // prepare the address of the next entry.
2549     ok = false;
2550 
2551     // Read the entry's prologue.
2552     if (!ReadEntryPrologue(cursor, &fde)) {
2553       if (!fde.end) {
2554         // If we couldn't even figure out this entry's extent, then we
2555         // must stop processing entries altogether.
2556         all_ok = false;
2557         break;
2558       }
2559       entry_end = fde.end;
2560       continue;
2561     }
2562 
2563     // The next iteration picks up after this entry.
2564     entry_end = fde.end;
2565 
2566     // Did we see an .eh_frame terminating mark?
2567     if (fde.kind == kTerminator) {
2568       // If there appears to be more data left in the section after the
2569       // terminating mark, warn the user. But this is just a warning;
2570       // we leave all_ok true.
2571       if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
2572       break;
2573     }
2574 
2575     // In this loop, we skip CIEs. We only parse them fully when we
2576     // parse an FDE that refers to them. This limits our memory
2577     // consumption (beyond the buffer itself) to that needed to
2578     // process the largest single entry.
2579     if (fde.kind != kFDE) {
2580       ok = true;
2581       continue;
2582     }
2583 
2584     // Validate the CIE pointer.
2585     if (fde.id > buffer_length_) {
2586       reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
2587       continue;
2588     }
2589 
2590     CIE cie;
2591 
2592     // Parse this FDE's CIE header.
2593     if (!ReadEntryPrologue(buffer_ + fde.id, &cie))
2594       continue;
2595     // This had better be an actual CIE.
2596     if (cie.kind != kCIE) {
2597       reporter_->BadCIEId(fde.offset, fde.id);
2598       continue;
2599     }
2600     if (!ReadCIEFields(&cie))
2601       continue;
2602 
2603     // TODO(nbilling): This could lead to strange behavior if a single buffer
2604     // contained a mixture of DWARF versions as well as address sizes. Not
2605     // sure if it's worth handling such a case.
2606 
2607     // DWARF4 CIE specifies address_size, so use it for this call frame.
2608     if (cie.version >= 4) {
2609       reader_->SetAddressSize(cie.address_size);
2610     }
2611 
2612     // We now have the values that govern both the CIE and the FDE.
2613     cie.cie = &cie;
2614     fde.cie = &cie;
2615 
2616     // Parse the FDE's header.
2617     if (!ReadFDEFields(&fde))
2618       continue;
2619 
2620     // Call Entry to ask the consumer if they're interested.
2621     if (!handler_->Entry(fde.offset, fde.address, fde.size,
2622                          cie.version, cie.augmentation,
2623                          cie.return_address_register)) {
2624       // The handler isn't interested in this entry. That's not an error.
2625       ok = true;
2626       continue;
2627     }
2628 
2629     if (cie.has_z_augmentation) {
2630       // Report the personality routine address, if we have one.
2631       if (cie.has_z_personality) {
2632         if (!handler_
2633             ->PersonalityRoutine(cie.personality_address,
2634                                  IsIndirectEncoding(cie.personality_encoding)))
2635           continue;
2636       }
2637 
2638       // Report the language-specific data area address, if we have one.
2639       if (cie.has_z_lsda) {
2640         if (!handler_
2641             ->LanguageSpecificDataArea(fde.lsda_address,
2642                                        IsIndirectEncoding(cie.lsda_encoding)))
2643           continue;
2644       }
2645 
2646       // If this is a signal-handling frame, report that.
2647       if (cie.has_z_signal_frame) {
2648         if (!handler_->SignalHandler())
2649           continue;
2650       }
2651     }
2652 
2653     // Interpret the CIE's instructions, and then the FDE's instructions.
2654     State state(reader_, handler_, reporter_, fde.address);
2655     ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
2656 
2657     // Tell the ByteReader that the function start address from the
2658     // FDE header is no longer valid.
2659     reader_->ClearFunctionBase();
2660 
2661     // Report the end of the entry.
2662     handler_->End();
2663   }
2664 
2665   return all_ok;
2666 }
2667 
KindName(EntryKind kind)2668 const char *CallFrameInfo::KindName(EntryKind kind) {
2669   if (kind == CallFrameInfo::kUnknown)
2670     return "entry";
2671   else if (kind == CallFrameInfo::kCIE)
2672     return "common information entry";
2673   else if (kind == CallFrameInfo::kFDE)
2674     return "frame description entry";
2675   else {
2676     assert (kind == CallFrameInfo::kTerminator);
2677     return ".eh_frame sequence terminator";
2678   }
2679 }
2680 
ReportIncomplete(Entry * entry)2681 bool CallFrameInfo::ReportIncomplete(Entry *entry) {
2682   reporter_->Incomplete(entry->offset, entry->kind);
2683   return false;
2684 }
2685 
Incomplete(uint64 offset,CallFrameInfo::EntryKind kind)2686 void CallFrameInfo::Reporter::Incomplete(uint64 offset,
2687                                          CallFrameInfo::EntryKind kind) {
2688   fprintf(stderr,
2689           "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n",
2690           filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2691           section_.c_str());
2692 }
2693 
EarlyEHTerminator(uint64 offset)2694 void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) {
2695   fprintf(stderr,
2696           "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker"
2697           " before end of section contents\n",
2698           filename_.c_str(), offset, section_.c_str());
2699 }
2700 
CIEPointerOutOfRange(uint64 offset,uint64 cie_offset)2701 void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset,
2702                                                    uint64 cie_offset) {
2703   fprintf(stderr,
2704           "%s: CFI frame description entry at offset 0x%llx in '%s':"
2705           " CIE pointer is out of range: 0x%llx\n",
2706           filename_.c_str(), offset, section_.c_str(), cie_offset);
2707 }
2708 
BadCIEId(uint64 offset,uint64 cie_offset)2709 void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) {
2710   fprintf(stderr,
2711           "%s: CFI frame description entry at offset 0x%llx in '%s':"
2712           " CIE pointer does not point to a CIE: 0x%llx\n",
2713           filename_.c_str(), offset, section_.c_str(), cie_offset);
2714 }
2715 
UnexpectedAddressSize(uint64 offset,uint8_t address_size)2716 void CallFrameInfo::Reporter::UnexpectedAddressSize(uint64 offset,
2717                                                     uint8_t address_size) {
2718   fprintf(stderr,
2719           "%s: CFI frame description entry at offset 0x%llx in '%s':"
2720           " CIE specifies unexpected address size: %d\n",
2721           filename_.c_str(), offset, section_.c_str(), address_size);
2722 }
2723 
UnexpectedSegmentSize(uint64 offset,uint8_t segment_size)2724 void CallFrameInfo::Reporter::UnexpectedSegmentSize(uint64 offset,
2725                                                     uint8_t segment_size) {
2726   fprintf(stderr,
2727           "%s: CFI frame description entry at offset 0x%llx in '%s':"
2728           " CIE specifies unexpected segment size: %d\n",
2729           filename_.c_str(), offset, section_.c_str(), segment_size);
2730 }
2731 
UnrecognizedVersion(uint64 offset,int version)2732 void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) {
2733   fprintf(stderr,
2734           "%s: CFI frame description entry at offset 0x%llx in '%s':"
2735           " CIE specifies unrecognized version: %d\n",
2736           filename_.c_str(), offset, section_.c_str(), version);
2737 }
2738 
UnrecognizedAugmentation(uint64 offset,const string & aug)2739 void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset,
2740                                                        const string &aug) {
2741   fprintf(stderr,
2742           "%s: CFI frame description entry at offset 0x%llx in '%s':"
2743           " CIE specifies unrecognized augmentation: '%s'\n",
2744           filename_.c_str(), offset, section_.c_str(), aug.c_str());
2745 }
2746 
InvalidPointerEncoding(uint64 offset,uint8 encoding)2747 void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset,
2748                                                      uint8 encoding) {
2749   fprintf(stderr,
2750           "%s: CFI common information entry at offset 0x%llx in '%s':"
2751           " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n",
2752           filename_.c_str(), offset, section_.c_str(), encoding);
2753 }
2754 
UnusablePointerEncoding(uint64 offset,uint8 encoding)2755 void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset,
2756                                                       uint8 encoding) {
2757   fprintf(stderr,
2758           "%s: CFI common information entry at offset 0x%llx in '%s':"
2759           " 'z' augmentation specifies a pointer encoding for which"
2760           " we have no base address: 0x%02x\n",
2761           filename_.c_str(), offset, section_.c_str(), encoding);
2762 }
2763 
RestoreInCIE(uint64 offset,uint64 insn_offset)2764 void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) {
2765   fprintf(stderr,
2766           "%s: CFI common information entry at offset 0x%llx in '%s':"
2767           " the DW_CFA_restore instruction at offset 0x%llx"
2768           " cannot be used in a common information entry\n",
2769           filename_.c_str(), offset, section_.c_str(), insn_offset);
2770 }
2771 
BadInstruction(uint64 offset,CallFrameInfo::EntryKind kind,uint64 insn_offset)2772 void CallFrameInfo::Reporter::BadInstruction(uint64 offset,
2773                                              CallFrameInfo::EntryKind kind,
2774                                              uint64 insn_offset) {
2775   fprintf(stderr,
2776           "%s: CFI %s at offset 0x%llx in section '%s':"
2777           " the instruction at offset 0x%llx is unrecognized\n",
2778           filename_.c_str(), CallFrameInfo::KindName(kind),
2779           offset, section_.c_str(), insn_offset);
2780 }
2781 
NoCFARule(uint64 offset,CallFrameInfo::EntryKind kind,uint64 insn_offset)2782 void CallFrameInfo::Reporter::NoCFARule(uint64 offset,
2783                                         CallFrameInfo::EntryKind kind,
2784                                         uint64 insn_offset) {
2785   fprintf(stderr,
2786           "%s: CFI %s at offset 0x%llx in section '%s':"
2787           " the instruction at offset 0x%llx assumes that a CFA rule has"
2788           " been set, but none has been set\n",
2789           filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2790           section_.c_str(), insn_offset);
2791 }
2792 
EmptyStateStack(uint64 offset,CallFrameInfo::EntryKind kind,uint64 insn_offset)2793 void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset,
2794                                               CallFrameInfo::EntryKind kind,
2795                                               uint64 insn_offset) {
2796   fprintf(stderr,
2797           "%s: CFI %s at offset 0x%llx in section '%s':"
2798           " the DW_CFA_restore_state instruction at offset 0x%llx"
2799           " should pop a saved state from the stack, but the stack is empty\n",
2800           filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2801           section_.c_str(), insn_offset);
2802 }
2803 
ClearingCFARule(uint64 offset,CallFrameInfo::EntryKind kind,uint64 insn_offset)2804 void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset,
2805                                               CallFrameInfo::EntryKind kind,
2806                                               uint64 insn_offset) {
2807   fprintf(stderr,
2808           "%s: CFI %s at offset 0x%llx in section '%s':"
2809           " the DW_CFA_restore_state instruction at offset 0x%llx"
2810           " would clear the CFA rule in effect\n",
2811           filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2812           section_.c_str(), insn_offset);
2813 }
2814 
2815 }  // namespace dwarf2reader
2816