1 // Copyright 2016 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <assert.h>
16 #include <stdio.h>
17 
18 #include <algorithm>
19 #include <initializer_list>
20 #include <iostream>
21 #include <memory>
22 #include <stack>
23 #include <unordered_map>
24 #include <unordered_set>
25 #include <vector>
26 
27 #include "absl/base/attributes.h"
28 #include "absl/base/macros.h"
29 #include "absl/strings/string_view.h"
30 #include "absl/strings/substitute.h"
31 #include "absl/types/optional.h"
32 #include "bloaty.h"
33 #include "bloaty.pb.h"
34 #include "dwarf_constants.h"
35 #include "re2/re2.h"
36 
37 using namespace dwarf2reader;
38 using absl::string_view;
39 
AlignUpTo(size_t offset,size_t granularity)40 static size_t AlignUpTo(size_t offset, size_t granularity) {
41   // Granularity must be a power of two.
42   return (offset + granularity - 1) & ~(granularity - 1);
43 }
44 
45 ABSL_ATTRIBUTE_NORETURN
Throw(const char * str,int line)46 static void Throw(const char *str, int line) {
47   throw bloaty::Error(str, __FILE__, line);
48 }
49 
50 #define THROW(msg) Throw(msg, __LINE__)
51 #define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
52 
53 namespace bloaty {
54 
55 extern int verbose_level;
56 
57 namespace dwarf {
58 
DivRoundUp(int n,int d)59 int DivRoundUp(int n, int d) {
60   return (n + (d - 1)) / d;
61 }
62 
63 
64 // Low-level Parsing Routines //////////////////////////////////////////////////
65 
66 // For parsing the low-level values found in DWARF files.  These are the only
67 // routines that touch the bytes of the input buffer directly.  Everything else
68 // is layered on top of these.
69 
70 template <class T>
ReadMemcpy(string_view * data)71 T ReadMemcpy(string_view* data) {
72   T ret;
73   if (data->size() < sizeof(T)) {
74     THROW("premature EOF reading fixed-length DWARF data");
75   }
76   memcpy(&ret, data->data(), sizeof(T));
77   data->remove_prefix(sizeof(T));
78   return ret;
79 }
80 
ReadPiece(size_t bytes,string_view * data)81 string_view ReadPiece(size_t bytes, string_view* data) {
82   if(data->size() < bytes) {
83     THROW("premature EOF reading variable-length DWARF data");
84   }
85   string_view ret = data->substr(0, bytes);
86   data->remove_prefix(bytes);
87   return ret;
88 }
89 
SkipBytes(size_t bytes,string_view * data)90 void SkipBytes(size_t bytes, string_view* data) {
91   if (data->size() < bytes) {
92     THROW("premature EOF skipping DWARF data");
93   }
94   data->remove_prefix(bytes);
95 }
96 
ReadNullTerminated(string_view * data)97 string_view ReadNullTerminated(string_view* data) {
98   const char* nullz =
99       static_cast<const char*>(memchr(data->data(), '\0', data->size()));
100 
101   // Return false if not NULL-terminated.
102   if (nullz == NULL) {
103     THROW("DWARF string was not NULL-terminated");
104   }
105 
106   size_t len = nullz - data->data();
107   string_view val = data->substr(0, len);
108   data->remove_prefix(len + 1);  // Remove NULL also.
109   return val;
110 }
111 
SkipNullTerminated(string_view * data)112 void SkipNullTerminated(string_view* data) {
113   const char* nullz =
114       static_cast<const char*>(memchr(data->data(), '\0', data->size()));
115 
116   // Return false if not NULL-terminated.
117   if (nullz == NULL) {
118     THROW("DWARF string was not NULL-terminated");
119   }
120 
121   size_t len = nullz - data->data();
122   data->remove_prefix(len + 1);  // Remove NULL also.
123 }
124 
125 // Parses the LEB128 format defined by DWARF (both signed and unsigned
126 // versions).
127 
ReadLEB128Internal(bool is_signed,string_view * data)128 uint64_t ReadLEB128Internal(bool is_signed, string_view* data) {
129   uint64_t ret = 0;
130   int shift = 0;
131   int maxshift = 70;
132   const char* ptr = data->data();
133   const char* limit = ptr + data->size();
134 
135   while (ptr < limit && shift < maxshift) {
136     char byte = *(ptr++);
137     ret |= static_cast<uint64_t>(byte & 0x7f) << shift;
138     shift += 7;
139     if ((byte & 0x80) == 0) {
140       data->remove_prefix(ptr - data->data());
141       if (is_signed && shift < 64 && (byte & 0x40)) {
142         ret |= -(1ULL << shift);
143       }
144       return ret;
145     }
146   }
147 
148   THROW("corrupt DWARF data, unterminated LEB128");
149 }
150 
151 template <typename T>
ReadLEB128(string_view * data)152 T ReadLEB128(string_view* data) {
153   typedef typename std::conditional<std::is_signed<T>::value, int64_t,
154                                     uint64_t>::type Int64Type;
155   Int64Type val = ReadLEB128Internal(std::is_signed<T>::value, data);
156   if (val > std::numeric_limits<T>::max() ||
157       val < std::numeric_limits<T>::min()) {
158     THROW("DWARF data contained larger LEB128 than we were expecting");
159   }
160   return static_cast<T>(val);
161 }
162 
SkipLEB128(string_view * data)163 void SkipLEB128(string_view* data) {
164   size_t limit =
165       std::min(static_cast<size_t>(data->size()), static_cast<size_t>(10));
166   for (size_t i = 0; i < limit; i++) {
167     if (((*data)[i] & 0x80) == 0) {
168       data->remove_prefix(i + 1);
169       return;
170     }
171   }
172 
173   THROW("corrupt DWARF data, unterminated LEB128");
174 }
175 
176 // Some size information attached to each compilation unit.  The size of an
177 // address or offset in the DWARF data depends on this state which is parsed
178 // from the header.
179 class CompilationUnitSizes {
180  public:
181   // When true, DWARF offsets are 64 bits, otherwise they are 32 bit.
dwarf64() const182   bool dwarf64() const { return dwarf64_; }
183 
184   // The size of addresses.  Guaranteed to be either 4 or 8.
address_size() const185   uint8_t address_size() const { return address_size_; }
186 
187   // DWARF version of this unit.
dwarf_version() const188   uint8_t dwarf_version() const { return dwarf_version_; }
189 
SetAddressSize(uint8_t address_size)190   void SetAddressSize(uint8_t address_size) {
191     if (address_size != 4 && address_size != 8) {
192       THROWF("Unexpected address size: $0", address_size);
193     }
194     address_size_ = address_size;
195   }
196 
197   // To allow this as the key in a map.
operator <(const CompilationUnitSizes & rhs) const198   bool operator<(const CompilationUnitSizes& rhs) const {
199     return std::tie(dwarf64_, address_size_) <
200            std::tie(rhs.dwarf64_, rhs.address_size_);
201   }
202 
203   // Reads a DWARF offset based on whether we are reading dwarf32 or dwarf64
204   // format.
ReadDWARFOffset(string_view * data) const205   uint64_t ReadDWARFOffset(string_view* data) const {
206     if (dwarf64_) {
207       return ReadMemcpy<uint64_t>(data);
208     } else {
209       return ReadMemcpy<uint32_t>(data);
210     }
211   }
212 
213   // Reads an address according to the expected address_size.
ReadAddress(string_view * data) const214   uint64_t ReadAddress(string_view* data) const {
215     if (address_size_ == 8) {
216       return ReadMemcpy<uint64_t>(data);
217     } else if (address_size_ == 4) {
218       return ReadMemcpy<uint32_t>(data);
219     } else {
220       BLOATY_UNREACHABLE();
221     }
222   }
223 
224   // Reads an "initial length" as specified in many DWARF headers.  This
225   // contains either a 32-bit or a 64-bit length, and signals whether we are
226   // using the 32-bit or 64-bit DWARF format (so it sets dwarf64 appropriately).
227   //
228   // Returns the range for this section and stores the remaining data
229   // in |remaining|.
ReadInitialLength(string_view * remaining)230   string_view ReadInitialLength(string_view* remaining) {
231     uint64_t len = ReadMemcpy<uint32_t>(remaining);
232 
233     if (len == 0xffffffff) {
234       dwarf64_ = true;
235       len = ReadMemcpy<uint64_t>(remaining);
236     } else {
237       dwarf64_ = false;
238     }
239 
240     if (remaining->size() < len) {
241       THROW("short DWARF compilation unit");
242     }
243 
244     string_view unit = *remaining;
245     unit.remove_suffix(remaining->size() - len);
246     *remaining = remaining->substr(len);
247     return unit;
248   }
249 
ReadDWARFVersion(string_view * data)250   void ReadDWARFVersion(string_view* data) {
251     dwarf_version_ = ReadMemcpy<uint16_t>(data);
252   }
253 
254  private:
255   uint16_t dwarf_version_;
256   bool dwarf64_;
257   uint8_t address_size_;
258 };
259 
260 
261 // AbbrevTable /////////////////////////////////////////////////////////////////
262 
263 // Parses and stores a representation of (a portion of) the .debug_abbrev
264 // section of a DWARF file.  An abbreviation is defined by a unique "code"
265 // (unique within one table), and defines the DIE tag and set of attributes.
266 // The encoding of the DIE then contains just the abbreviation code and the
267 // attribute values -- thanks to the abbreviation table, the tag and attribute
268 // keys/names are not required.
269 //
270 // The abbreviations are an internal detail of the DWARF format and users should
271 // not need to care about them.
272 
273 class AbbrevTable {
274  public:
275   // Reads abbreviations until a terminating abbreviation is seen.
276   string_view ReadAbbrevs(string_view data);
277 
278   // In a DWARF abbreviation, each attribute has a name and a form.
279   struct Attribute {
280     uint16_t name;
281     uint8_t form;
282   };
283 
284   // The representation of a single abbreviation.
285   struct Abbrev {
286     uint32_t code;
287     uint16_t tag;
288     bool has_child;
289     std::vector<Attribute> attr;
290   };
291 
IsEmpty() const292   bool IsEmpty() const { return abbrev_.empty(); }
293 
294   // Looks for an abbreviation with the given code.  Returns true if the lookup
295   // succeeded.
GetAbbrev(uint32_t code,const Abbrev ** abbrev) const296   bool GetAbbrev(uint32_t code, const Abbrev** abbrev) const {
297     auto it = abbrev_.find(code);
298     if (it != abbrev_.end()) {
299       *abbrev = &it->second;
300       return true;
301     } else {
302       return false;
303     }
304   }
305 
306  private:
307   // Keyed by abbreviation code.
308   // Generally we expect these to be small, so we could almost use a vector<>.
309   // But you never know what crazy input data is going to do...
310   std::unordered_map<uint32_t, Abbrev> abbrev_;
311 };
312 
ReadAbbrevs(string_view data)313 string_view AbbrevTable::ReadAbbrevs(string_view data) {
314   while (true) {
315     uint32_t code = ReadLEB128<uint32_t>(&data);
316 
317     if (code == 0) {
318       return data;  // Terminator entry.
319     }
320 
321     Abbrev& abbrev = abbrev_[code];
322 
323     if (abbrev.code) {
324       THROW("DWARF data contained duplicate abbrev code");
325     }
326 
327     uint8_t has_child;
328 
329     abbrev.code = code;
330     abbrev.tag = ReadLEB128<uint16_t>(&data);
331     has_child = ReadMemcpy<uint8_t>(&data);
332 
333     switch (has_child) {
334       case DW_children_yes:
335         abbrev.has_child = true;
336         break;
337       case DW_children_no:
338         abbrev.has_child = false;
339         break;
340       default:
341         THROW("DWARF has_child is neither true nor false.");
342     }
343 
344     while (true) {
345       Attribute attr;
346       attr.name = ReadLEB128<uint16_t>(&data);
347       attr.form = ReadLEB128<uint8_t>(&data);
348 
349       if (attr.name == 0 && attr.form == 0) {
350         break;  // End of this abbrev
351       }
352 
353       abbrev.attr.push_back(attr);
354     }
355   }
356 }
357 
358 
359 // StringTable /////////////////////////////////////////////////////////////////
360 
361 // Represents the .debug_str portion of a DWARF file and contains code for
362 // reading strings out of it.  This is an internal detail of the DWARF format
363 // and users should not need to care about it.
364 
365 class StringTable {
366  public:
367   // Construct with the debug_str data from a DWARF file.
StringTable(string_view debug_str)368   StringTable(string_view debug_str) : debug_str_(debug_str) {}
369 
370   // Read a string from the table.
371   string_view ReadEntry(size_t ofs) const;
372 
373  private:
374   string_view debug_str_;
375 };
376 
ReadEntry(size_t ofs) const377 string_view StringTable::ReadEntry(size_t ofs) const {
378   string_view str = debug_str_;
379   SkipBytes(ofs, &str);
380   return ReadNullTerminated(&str);
381 }
382 
383 
384 // AddressRanges ///////////////////////////////////////////////////////////////
385 
386 // Code for reading address ranges out of .debug_aranges.
387 
388 class AddressRanges {
389  public:
AddressRanges(string_view data)390   AddressRanges(string_view data) : section_(data), next_unit_(data) {}
391 
392   // Offset into .debug_info for the current compilation unit.
debug_info_offset()393   uint64_t debug_info_offset() { return debug_info_offset_; }
394 
395   // Address and length for this range.
address()396   uint64_t address() { return address_; }
length()397   uint64_t length() { return length_; }
398 
399   // Advance to the next range.  The values will be available in address() and
400   // length().  Returns false when the end of this compilation unit is hit.
401   // Must call this once before reading the first range.
402   bool NextRange();
403 
404   // Advance to the next compilation unit.  The unit offset will be available in
405   // debug_info_offset().  Must call this once before reading the first unit.
406   bool NextUnit();
407 
408  private:
409   CompilationUnitSizes sizes_;
410   string_view section_;
411   string_view unit_remaining_;
412   string_view next_unit_;
413   uint64_t debug_info_offset_;
414   uint64_t address_;
415   uint64_t length_;
416 };
417 
NextRange()418 bool AddressRanges::NextRange() {
419   if (unit_remaining_.empty()) {
420     return false;
421   }
422 
423   address_ = sizes_.ReadAddress(&unit_remaining_);
424   length_ = sizes_.ReadAddress(&unit_remaining_);
425   return true;
426 }
427 
NextUnit()428 bool AddressRanges::NextUnit() {
429   if (next_unit_.empty()) {
430     return false;
431   }
432 
433   unit_remaining_ = sizes_.ReadInitialLength(&next_unit_);
434   sizes_.ReadDWARFVersion(&unit_remaining_);
435 
436   if (sizes_.dwarf_version() > 4) {
437     THROW("DWARF data is too new for us");
438   }
439 
440   debug_info_offset_ = sizes_.ReadDWARFOffset(&unit_remaining_);
441 
442   uint8_t segment_size;
443 
444   sizes_.SetAddressSize(ReadMemcpy<uint8_t>(&unit_remaining_));
445   segment_size = ReadMemcpy<uint8_t>(&unit_remaining_);
446 
447   if (segment_size) {
448     THROW("we don't know how to handle segmented addresses.");
449   }
450 
451   size_t ofs = unit_remaining_.data() - section_.data();
452   size_t aligned_ofs = AlignUpTo(ofs, sizes_.address_size() * 2);
453   SkipBytes(aligned_ofs - ofs, &unit_remaining_);
454   return true;
455 }
456 
457 
458 // LocationList ////////////////////////////////////////////////////////////////
459 
460 // Code for reading entries out of a location list.
461 // For the moment we only care about finding the bounds of a list given its
462 // offset, so we don't actually vend any of the data.
463 
464 class LocationList {
465  public:
LocationList(CompilationUnitSizes sizes,string_view data)466   LocationList(CompilationUnitSizes sizes, string_view data)
467       : sizes_(sizes), remaining_(data) {}
468 
read_offset() const469   const char* read_offset() const { return remaining_.data(); }
470   bool NextEntry();
471 
472  private:
473   CompilationUnitSizes sizes_;
474   string_view remaining_;
475 };
476 
NextEntry()477 bool LocationList::NextEntry() {
478   uint64_t start, end;
479   start = sizes_.ReadAddress(&remaining_);
480   end = sizes_.ReadAddress(&remaining_);
481   if (start == 0 && end == 0) {
482     return false;
483   } else if (start == UINT64_MAX ||
484              (start == UINT32_MAX && sizes_.address_size() == 4)) {
485     // Base address selection, nothing more to do.
486   } else {
487     // Need to skip the location description.
488     uint16_t length = ReadMemcpy<uint16_t>(&remaining_);
489     SkipBytes(length, &remaining_);
490   }
491   return true;
492 }
493 
GetLocationListRange(CompilationUnitSizes sizes,string_view available)494 string_view GetLocationListRange(CompilationUnitSizes sizes,
495                                  string_view available) {
496   LocationList list(sizes, available);
497   while (list.NextEntry()) {}
498   return available.substr(0, list.read_offset() - available.data());
499 }
500 
501 
502 // RangeList ///////////////////////////////////////////////////////////////////
503 
504 // Code for reading entries out of a range list.
505 // For the moment we only care about finding the bounds of a list given its
506 // offset, so we don't actually vend any of the data.
507 
508 class RangeList {
509  public:
RangeList(CompilationUnitSizes sizes,string_view data)510   RangeList(CompilationUnitSizes sizes, string_view data)
511       : sizes_(sizes), remaining_(data) {}
512 
read_offset() const513   const char* read_offset() const { return remaining_.data(); }
514   bool NextEntry();
515 
516  private:
517   CompilationUnitSizes sizes_;
518   string_view remaining_;
519 };
520 
NextEntry()521 bool RangeList::NextEntry() {
522   uint64_t start, end;
523   start = sizes_.ReadAddress(&remaining_);
524   end = sizes_.ReadAddress(&remaining_);
525   if (start == 0 && end == 0) {
526     return false;
527   }
528   return true;
529 }
530 
GetRangeListRange(CompilationUnitSizes sizes,string_view available)531 string_view GetRangeListRange(CompilationUnitSizes sizes,
532                               string_view available) {
533   RangeList list(sizes, available);
534   while (list.NextEntry()) {
535   }
536   return available.substr(0, list.read_offset() - available.data());
537 }
538 
539 // DIEReader ///////////////////////////////////////////////////////////////////
540 
541 // Reads a sequence of DWARF DIE's (Debugging Information Entries) from the
542 // .debug_info or .debug_types section of a binary.
543 //
544 // Each DIE contains a tag and a set of attribute/value pairs.  We rely on the
545 // abbreviations in an AbbrevTable to decode the DIEs.
546 
547 class DIEReader {
548  public:
549   // Constructs a new DIEReader.  Cannot be used until you call one of the
550   // Seek() methods below.
DIEReader(const File & file)551   DIEReader(const File& file) : dwarf_(file) {}
552 
553   // Returns true if we are at the end of DIEs for this compilation unit.
IsEof() const554   bool IsEof() const { return state_ == State::kEof; }
555 
556   // DIEs exist in both .debug_info and .debug_types.
557   enum class Section {
558     kDebugInfo,
559     kDebugTypes
560   };
561 
562   // Seeks to the overall start or the start of a specific compilation unit.
563   // Note that |header_offset| is the offset of the compilation unit *header*,
564   // not the offset of the first DIE.
565   bool SeekToCompilationUnit(Section section, uint64_t header_offset);
SeekToStart(Section section)566   bool SeekToStart(Section section) {
567     return SeekToCompilationUnit(section, 0);
568   }
569 
570   bool NextCompilationUnit();
571 
572   // Advances to the next overall DIE, ignoring whether it happens to be a
573   // child, a sibling, or an uncle/aunt.  Returns false at error or EOF.
574   bool NextDIE();
575 
576   // Skips children of the current DIE, so that the next call to NextDIE()
577   // will read the next sibling (or parent, if no sibling exists).
578   bool SkipChildren();
579 
GetAbbrev() const580   const AbbrevTable::Abbrev& GetAbbrev() const {
581     assert(!IsEof());
582     return *current_abbrev_;
583   }
584 
585   // Returns the tag of the current DIE.
586   // Requires that ReadCode() has been called at least once.
GetTag() const587   uint16_t GetTag() const { return GetAbbrev().tag; }
588 
589   // Returns whether the current DIE has a child.
590   // Requires that ReadCode() has been called at least once.
HasChild() const591   bool HasChild() const { return GetAbbrev().has_child; }
592 
dwarf() const593   const File& dwarf() const { return dwarf_; }
594 
unit_range() const595   string_view unit_range() const { return unit_range_; }
unit_sizes() const596   CompilationUnitSizes unit_sizes() const { return unit_sizes_; }
abbrev_version() const597   uint32_t abbrev_version() const { return abbrev_version_; }
debug_abbrev_offset() const598   uint64_t debug_abbrev_offset() const { return debug_abbrev_offset_; }
599 
600   // If both compileunit_name and strp_sink are set, this will automatically
601   // call strp_sink->AddFileRange(compileunit_name, <string range>) for every
602   // DW_FORM_strp attribute encountered.  These strings occur in the .debug_str
603   // section.
set_compileunit_name(absl::string_view name)604   void set_compileunit_name(absl::string_view name) {
605     unit_name_ = std::string(name);
606   }
set_strp_sink(RangeSink * sink)607   void set_strp_sink(RangeSink* sink) { strp_sink_ = sink; }
608 
AddIndirectString(string_view range) const609   void AddIndirectString(string_view range) const {
610     if (strp_sink_) {
611       strp_sink_->AddFileRange("dwarf_strp", unit_name_, range);
612     }
613   }
614 
615  private:
616   BLOATY_DISALLOW_COPY_AND_ASSIGN(DIEReader);
617 
618   template<typename> friend class AttrReader;
619 
620   // APIs for our friends to use to update our state.
621 
622   // Call to get the current read head where attributes should be parsed.
ReadAttributesBegin()623   string_view ReadAttributesBegin() {
624     assert(state_ == State::kReadyToReadAttributes);
625     return remaining_;
626   }
627 
628   // When some data has been parsed, this updates our read head.
ReadAttributesEnd(string_view remaining,uint64_t sibling)629   bool ReadAttributesEnd(string_view remaining, uint64_t sibling) {
630     assert(state_ == State::kReadyToReadAttributes);
631     if (remaining.data() == nullptr) {
632       THROW("premature EOF reading DWARF attributes");
633     } else {
634       remaining_ = remaining;
635       sibling_offset_ = sibling;
636       state_ = State::kReadyToNext;
637       return true;
638     }
639   }
640 
641   // Internal APIs.
642 
643   bool ReadCompilationUnitHeader();
644   bool ReadCode();
645 
646   enum class State {
647     kReadyToReadAttributes,
648     kReadyToNext,
649     kEof,
650   } state_;
651 
652   std::string error_;
653 
654   const File& dwarf_;
655   RangeSink* strp_sink_ = nullptr;
656 
657   // Abbreviation for the current entry.
658   const AbbrevTable::Abbrev* current_abbrev_;
659 
660   // Our current read position.
661   string_view remaining_;
662   uint64_t sibling_offset_;
663   int depth_ = 0;
664 
665   // Data for the next compilation unit.
666   string_view next_unit_;
667 
668   // All of the AbbrevTables we've read from .debug_abbrev, indexed by their
669   // offset within .debug_abbrev.
670   std::unordered_map<uint64_t, AbbrevTable> abbrev_tables_;
671 
672   // Whether we are in .debug_types or .debug_info.
673   Section section_;
674 
675   // Information about the current compilation unit.
676   uint64_t debug_abbrev_offset_;
677   std::string unit_name_;
678   string_view unit_range_;
679   CompilationUnitSizes unit_sizes_;
680   AbbrevTable* unit_abbrev_;
681 
682   // A small integer that uniquely identifies the combination of unit_abbrev_
683   // and unit_sizes_.  Attribute readers use this to know when they can reuse an
684   // existing (abbrev code) -> (Actions) mapping, since this table depends on
685   // both the current abbrev. table and the sizes.
686   uint32_t abbrev_version_;
687 
688   std::map<std::pair<AbbrevTable*, CompilationUnitSizes>, uint32_t>
689       abbrev_versions_;
690 
691   // Only for .debug_types
692   uint64_t unit_type_signature_;
693   uint64_t unit_type_offset_;
694 };
695 
ReadCode()696 bool DIEReader::ReadCode() {
697   uint32_t code;
698 again:
699   if (remaining_.empty()) {
700     state_ = State::kEof;
701     return false;
702   }
703   code = ReadLEB128<uint32_t>(&remaining_);
704   if (code == 0) {
705     // null entry terminates a chain of sibling entries.
706     depth_--;
707     goto again;
708   }
709 
710   if (!unit_abbrev_->GetAbbrev(code, &current_abbrev_)) {
711     THROW("couldn't find abbreviation for code");
712   }
713   state_ = State::kReadyToReadAttributes;
714   sibling_offset_ = 0;
715 
716   if (HasChild()) {
717     depth_++;
718   }
719 
720   return true;
721 }
722 
NextCompilationUnit()723 bool DIEReader::NextCompilationUnit() {
724   return ReadCompilationUnitHeader();
725 }
726 
NextDIE()727 bool DIEReader::NextDIE() {
728   if (state_ == State::kEof) {
729     return false;
730   }
731 
732   assert(state_ == State::kReadyToNext);
733   return ReadCode();
734 }
735 
SeekToCompilationUnit(Section section,uint64_t offset)736 bool DIEReader::SeekToCompilationUnit(Section section, uint64_t offset) {
737   section_ = section;
738 
739   if (section == Section::kDebugInfo) {
740     next_unit_ = dwarf_.debug_info;
741   } else {
742     next_unit_ = dwarf_.debug_types;
743   }
744 
745   SkipBytes(offset, &next_unit_);
746   return ReadCompilationUnitHeader();
747 }
748 
ReadCompilationUnitHeader()749 bool DIEReader::ReadCompilationUnitHeader() {
750   if (next_unit_.empty()) {
751     state_ = State::kEof;
752     return false;
753   }
754 
755   unit_range_ = next_unit_;
756   remaining_ = unit_sizes_.ReadInitialLength(&next_unit_);
757   unit_range_ = unit_range_.substr(
758       0, remaining_.size() + (remaining_.data() - unit_range_.data()));
759 
760   unit_sizes_.ReadDWARFVersion(&remaining_);
761 
762   if (unit_sizes_.dwarf_version() > 4) {
763     THROW("Data is in new DWARF format we don't understand");
764   }
765 
766   debug_abbrev_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
767   unit_abbrev_ = &abbrev_tables_[debug_abbrev_offset_];
768 
769   // If we haven't already read abbreviations for this debug_abbrev_offset_, we
770   // need to do so now.
771   if (unit_abbrev_->IsEmpty()) {
772     string_view abbrev_data = dwarf_.debug_abbrev;
773     SkipBytes(debug_abbrev_offset_, &abbrev_data);
774     unit_abbrev_->ReadAbbrevs(abbrev_data);
775   }
776 
777   unit_sizes_.SetAddressSize(ReadMemcpy<uint8_t>(&remaining_));
778 
779   if (section_ == Section::kDebugTypes) {
780     unit_type_signature_ = ReadMemcpy<uint64_t>(&remaining_);
781     unit_type_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
782   }
783 
784   auto abbrev_id = std::make_pair(unit_abbrev_, unit_sizes_);
785   auto insert_pair = abbrev_versions_.insert(
786       std::make_pair(abbrev_id, abbrev_versions_.size()));
787 
788   // This will be either the newly inserted value or the existing one, if there
789   // was one.
790   abbrev_version_ = insert_pair.first->second;
791 
792   return ReadCode();
793 }
794 
795 
796 // DWARF form parsing //////////////////////////////////////////////////////////
797 
798 class AttrValue {
799  public:
AttrValue(uint64_t val)800   AttrValue(uint64_t val) : uint_(val), type_(Type::kUint) {}
AttrValue(string_view val)801   AttrValue(string_view val) : string_(val), type_(Type::kString) {}
802 
803   enum class Type {
804     kUint,
805     kString
806   };
807 
type() const808   Type type() const { return type_; }
IsUint() const809   bool IsUint() const { return type_ == Type::kUint; }
IsString() const810   bool IsString() const { return type_ == Type::kString; }
811 
ToUint() const812   absl::optional<uint64_t> ToUint() const {
813     if (IsUint()) return uint_;
814     string_view str = string_;
815     switch (str.size()) {
816       case 1:
817         return ReadMemcpy<uint8_t>(&str);
818       case 2:
819         return ReadMemcpy<uint8_t>(&str);
820       case 4:
821         return ReadMemcpy<uint32_t>(&str);
822       case 8:
823         return ReadMemcpy<uint64_t>(&str);
824     }
825     return absl::nullopt;
826   }
827 
GetUint() const828   uint64_t GetUint() const {
829     assert(type_ == Type::kUint);
830     return uint_;
831   }
832 
GetString() const833   string_view GetString() const {
834     assert(type_ == Type::kString);
835     return string_;
836   }
837 
838  private:
839   union {
840     uint64_t uint_;
841     string_view string_;
842   };
843 
844   Type type_;
845 };
846 
847 template <class D>
ReadBlock(string_view * data)848 string_view ReadBlock(string_view* data) {
849   D len = ReadMemcpy<D>(data);
850   return ReadPiece(len, data);
851 }
852 
ReadVariableBlock(string_view * data)853 string_view ReadVariableBlock(string_view* data) {
854   uint64_t len = ReadLEB128<uint64_t>(data);
855   return ReadPiece(len, data);
856 }
857 
858 template <class D>
ReadIndirectString(const DIEReader & reader,string_view * data)859 string_view ReadIndirectString(const DIEReader& reader, string_view* data) {
860   D ofs = ReadMemcpy<D>(data);
861   StringTable table(reader.dwarf().debug_str);
862   string_view ret = table.ReadEntry(ofs);
863   reader.AddIndirectString(ret);
864   return ret;
865 }
866 
ParseAttr(const DIEReader & reader,uint8_t form,string_view * data)867 AttrValue ParseAttr(const DIEReader& reader, uint8_t form, string_view* data) {
868   switch (form) {
869     case DW_FORM_indirect: {
870       uint16_t indirect_form = ReadLEB128<uint16_t>(data);
871       if (indirect_form == DW_FORM_indirect) {
872         THROW("indirect attribute has indirect form type");
873       }
874       return ParseAttr(reader, indirect_form, data);
875     }
876     case DW_FORM_ref1:
877       return AttrValue(ReadMemcpy<uint8_t>(data));
878     case DW_FORM_ref2:
879       return AttrValue(ReadMemcpy<uint16_t>(data));
880     case DW_FORM_ref4:
881       return AttrValue(ReadMemcpy<uint32_t>(data));
882     case DW_FORM_ref_sig8:
883     case DW_FORM_ref8:
884       return AttrValue(ReadMemcpy<uint64_t>(data));
885     case DW_FORM_ref_udata:
886       return AttrValue(ReadLEB128<uint64_t>(data));
887     case DW_FORM_addr:
888     address_size:
889       switch (reader.unit_sizes().address_size()) {
890         case 4:
891           return AttrValue(ReadMemcpy<uint32_t>(data));
892         case 8:
893           return AttrValue(ReadMemcpy<uint64_t>(data));
894         default:
895           BLOATY_UNREACHABLE();
896       }
897     case DW_FORM_ref_addr:
898       if (reader.unit_sizes().dwarf_version() <= 2) {
899         goto address_size;
900       }
901       ABSL_FALLTHROUGH_INTENDED;
902     case DW_FORM_sec_offset:
903       if (reader.unit_sizes().dwarf64()) {
904         return AttrValue(ReadMemcpy<uint64_t>(data));
905       } else {
906         return AttrValue(ReadMemcpy<uint32_t>(data));
907       }
908     case DW_FORM_udata:
909       return AttrValue(ReadLEB128<uint64_t>(data));
910     case DW_FORM_block1:
911       return AttrValue(ReadBlock<uint8_t>(data));
912     case DW_FORM_block2:
913       return AttrValue(ReadBlock<uint16_t>(data));
914     case DW_FORM_block4:
915       return AttrValue(ReadBlock<uint32_t>(data));
916     case DW_FORM_block:
917     case DW_FORM_exprloc:
918       return AttrValue(ReadVariableBlock(data));
919     case DW_FORM_string:
920       return AttrValue(ReadNullTerminated(data));
921     case DW_FORM_strp:
922       if (reader.unit_sizes().dwarf64()) {
923         return AttrValue(ReadIndirectString<uint64_t>(reader, data));
924       } else {
925         return AttrValue(ReadIndirectString<uint32_t>(reader, data));
926       }
927     case DW_FORM_data1:
928       return AttrValue(ReadPiece(1, data));
929     case DW_FORM_data2:
930       return AttrValue(ReadPiece(2, data));
931     case DW_FORM_data4:
932       return AttrValue(ReadPiece(4, data));
933     case DW_FORM_data8:
934       return AttrValue(ReadPiece(8, data));
935 
936     // Bloaty doesn't currently care about any bool or signed data.
937     // So we fudge it a bit and just stuff these in a uint64.
938     case DW_FORM_flag_present:
939       return AttrValue(1);
940     case DW_FORM_flag:
941       return AttrValue(ReadMemcpy<uint8_t>(data));
942     case DW_FORM_sdata:
943       return AttrValue(ReadLEB128<uint64_t>(data));
944     default:
945       THROWF("Don't know how to parse DWARF form: $0", form);
946   }
947 }
948 
949 
950 // AttrReader //////////////////////////////////////////////////////////////////
951 
952 // Parses a DIE's attributes, calling user callbacks with the parsed values.
953 
954 template <class T>
955 class AttrReader {
956  public:
957   typedef void CallbackFunc(T* container, AttrValue val);
958 
OnAttribute(DwarfAttribute attr,CallbackFunc * func)959   void OnAttribute(DwarfAttribute attr, CallbackFunc* func) {
960     attributes_[attr] = func;
961   }
962 
963   // Reads all attributes for this DIE, storing the ones we were expecting.
ReadAttributes(DIEReader * reader,T * container)964   void ReadAttributes(DIEReader* reader, T* container) {
965     string_view data = reader->ReadAttributesBegin();
966     const AbbrevTable::Abbrev& abbrev = reader->GetAbbrev();
967 
968     for (auto attr : abbrev.attr) {
969       AttrValue value = ParseAttr(*reader, attr.form, &data);
970       auto it = attributes_.find(attr.name);
971       if (it != attributes_.end()) {
972         it->second(container, value);
973       }
974     }
975 
976     reader->ReadAttributesEnd(data, 0);
977   }
978 
979  private:
980   std::unordered_map<int, CallbackFunc*> attributes_;
981 };
982 
983 // From DIEReader, defined here because it depends on FixedAttrReader.
SkipChildren()984 bool DIEReader::SkipChildren() {
985   assert(state_ == State::kReadyToNext);
986   if (!HasChild()) {
987     return true;
988   }
989 
990   int target_depth = depth_ - 1;
991   dwarf::AttrReader<void> attr_reader;
992   while (depth_ > target_depth) {
993     // TODO(haberman): use DW_AT_sibling to optimize skipping when it is
994     // available.
995     if (!NextDIE()) {
996       return false;
997     }
998     attr_reader.ReadAttributes(this, nullptr);
999   }
1000   return true;
1001 }
1002 
1003 // LineInfoReader //////////////////////////////////////////////////////////////
1004 
1005 // Code to read the .line_info programs in a DWARF file.
1006 
1007 class LineInfoReader {
1008  public:
LineInfoReader(const File & file)1009   LineInfoReader(const File& file) : file_(file), info_(0) {}
1010 
1011   struct LineInfo {
LineInfobloaty::dwarf::LineInfoReader::LineInfo1012     LineInfo(bool default_is_stmt) : is_stmt(default_is_stmt) {}
1013     uint64_t address = 0;
1014     uint32_t file = 1;
1015     uint32_t line = 1;
1016     uint32_t column = 0;
1017     uint32_t discriminator = 0;
1018     bool end_sequence = false;
1019     bool basic_block = false;
1020     bool prologue_end = false;
1021     bool epilogue_begin = false;
1022     bool is_stmt;
1023     uint8_t op_index = 0;
1024     uint8_t isa = 0;
1025   };
1026 
1027   struct FileName {
1028     string_view name;
1029     uint32_t directory_index;
1030     uint64_t modified_time;
1031     uint64_t file_size;
1032   };
1033 
1034   void SeekToOffset(uint64_t offset, uint8_t address_size);
1035   bool ReadLineInfo();
lineinfo() const1036   const LineInfo& lineinfo() const { return info_; }
filename(size_t i) const1037   const FileName& filename(size_t i) const { return filenames_[i]; }
include_directory(size_t i) const1038   string_view include_directory(size_t i) const {
1039     return include_directories_[i];
1040   }
1041 
GetExpandedFilename(size_t index)1042   const std::string& GetExpandedFilename(size_t index) {
1043     if (index >= filenames_.size()) {
1044       THROW("filename index out of range");
1045     }
1046 
1047     // Generate these lazily.
1048     if (expanded_filenames_.size() <= index) {
1049       expanded_filenames_.resize(filenames_.size());
1050     }
1051 
1052     std::string& ret = expanded_filenames_[index];
1053     if (ret.empty()) {
1054       const FileName& filename = filenames_[index];
1055       string_view directory = include_directories_[filename.directory_index];
1056       ret = std::string(directory);
1057       if (!ret.empty()) {
1058         ret += "/";
1059       }
1060       ret += std::string(filename.name);
1061     }
1062     return ret;
1063   }
1064 
1065  private:
1066   struct Params {
1067     uint8_t minimum_instruction_length;
1068     uint8_t maximum_operations_per_instruction;
1069     uint8_t default_is_stmt;
1070     int8_t line_base;
1071     uint8_t line_range;
1072     uint8_t opcode_base;
1073   } params_;
1074 
1075   const File& file_;
1076 
1077   CompilationUnitSizes sizes_;
1078   std::vector<string_view> include_directories_;
1079   std::vector<FileName> filenames_;
1080   std::vector<uint8_t> standard_opcode_lengths_;
1081   std::vector<std::string> expanded_filenames_;
1082 
1083   string_view remaining_;
1084 
1085   // Whether we are in a "shadow" part of the bytecode program.  Sometimes
1086   // parts of the line info program make it into the final binary even though
1087   // the corresponding code was stripped.  We can tell when this happened by
1088   // looking for DW_LNE_set_address ops where the operand is 0.  This
1089   // indicates that a relocation for that argument never got applied, which
1090   // probably means that the code got stripped.
1091   //
1092   // While this is true, we don't yield any LineInfo entries, because the
1093   // "address" value is garbage.
1094   bool shadow_;
1095 
1096   LineInfo info_;
1097 
DoAdvance(uint64_t advance,uint8_t max_per_instr)1098   void DoAdvance(uint64_t advance, uint8_t max_per_instr) {
1099     info_.address += params_.minimum_instruction_length *
1100                      ((info_.op_index + advance) / max_per_instr);
1101     info_.op_index = (info_.op_index + advance) % max_per_instr;
1102   }
1103 
Advance(uint64_t amount)1104   void Advance(uint64_t amount) {
1105     if (params_.maximum_operations_per_instruction == 1) {
1106       // This is by far the common case (only false on VLIW architectuers),
1107       // and this inlining/specialization avoids a costly division.
1108       DoAdvance(amount, 1);
1109     } else {
1110       DoAdvance(amount, params_.maximum_operations_per_instruction);
1111     }
1112   }
1113 
AdjustedOpcode(uint8_t op)1114   uint8_t AdjustedOpcode(uint8_t op) { return op - params_.opcode_base; }
1115 
SpecialOpcodeAdvance(uint8_t op)1116   void SpecialOpcodeAdvance(uint8_t op) {
1117     Advance(AdjustedOpcode(op) / params_.line_range);
1118   }
1119 };
1120 
SeekToOffset(uint64_t offset,uint8_t address_size)1121 void LineInfoReader::SeekToOffset(uint64_t offset, uint8_t address_size) {
1122   string_view data = file_.debug_line;
1123   SkipBytes(offset, &data);
1124 
1125   sizes_.SetAddressSize(address_size);
1126   data = sizes_.ReadInitialLength(&data);
1127   sizes_.ReadDWARFVersion(&data);
1128   uint64_t header_length = sizes_.ReadDWARFOffset(&data);
1129   string_view program = data;
1130   SkipBytes(header_length, &program);
1131 
1132   params_.minimum_instruction_length = ReadMemcpy<uint8_t>(&data);
1133   if (sizes_.dwarf_version() == 4) {
1134     params_.maximum_operations_per_instruction = ReadMemcpy<uint8_t>(&data);
1135 
1136     if (params_.maximum_operations_per_instruction == 0) {
1137       THROW("DWARF line info had maximum_operations_per_instruction=0");
1138     }
1139   } else {
1140     params_.maximum_operations_per_instruction = 1;
1141   }
1142   params_.default_is_stmt = ReadMemcpy<uint8_t>(&data);
1143   params_.line_base = ReadMemcpy<int8_t>(&data);
1144   params_.line_range = ReadMemcpy<uint8_t>(&data);
1145   params_.opcode_base = ReadMemcpy<uint8_t>(&data);
1146   if (params_.line_range == 0) {
1147     THROW("line_range of zero will cause divide by zero");
1148   }
1149 
1150   standard_opcode_lengths_.resize(params_.opcode_base);
1151   for (size_t i = 1; i < params_.opcode_base; i++) {
1152     standard_opcode_lengths_[i] = ReadMemcpy<uint8_t>(&data);
1153   }
1154 
1155   // Read include_directories.
1156   include_directories_.clear();
1157 
1158   // Implicit current directory entry.
1159   include_directories_.push_back(string_view());
1160 
1161   while (true) {
1162     string_view dir = ReadNullTerminated(&data);
1163     if (dir.empty()) {
1164       break;
1165     }
1166     include_directories_.push_back(dir);
1167   }
1168 
1169   // Read file_names.
1170   filenames_.clear();
1171   expanded_filenames_.clear();
1172 
1173   // Filename 0 is unused.
1174   filenames_.push_back(FileName());
1175   while (true) {
1176     FileName file_name;
1177     file_name.name = ReadNullTerminated(&data);
1178     if (file_name.name.empty()) {
1179       break;
1180     }
1181     file_name.directory_index = ReadLEB128<uint32_t>(&data);
1182     file_name.modified_time = ReadLEB128<uint64_t>(&data);
1183     file_name.file_size = ReadLEB128<uint64_t>(&data);
1184     if (file_name.directory_index >= include_directories_.size()) {
1185       THROW("directory index out of range");
1186     }
1187     filenames_.push_back(file_name);
1188   }
1189 
1190   info_ = LineInfo(params_.default_is_stmt);
1191   remaining_ = program;
1192   shadow_ = false;
1193 }
1194 
ReadLineInfo()1195 bool LineInfoReader::ReadLineInfo() {
1196   // Final step of last DW_LNS_copy / special opcode.
1197   info_.discriminator = 0;
1198   info_.basic_block = false;
1199   info_.prologue_end = false;
1200   info_.epilogue_begin = false;
1201 
1202   // Final step of DW_LNE_end_sequence.
1203   info_.end_sequence = false;
1204 
1205   string_view data = remaining_;
1206 
1207   while (true) {
1208     if (data.empty()) {
1209       remaining_ = data;
1210       return false;
1211     }
1212 
1213     uint8_t op = ReadMemcpy<uint8_t>(&data);
1214 
1215     if (op >= params_.opcode_base) {
1216       SpecialOpcodeAdvance(op);
1217       info_.line +=
1218           params_.line_base + (AdjustedOpcode(op) % params_.line_range);
1219       if (!shadow_) {
1220         remaining_ = data;
1221         return true;
1222       }
1223     } else {
1224       switch (op) {
1225         case DW_LNS_extended_op: {
1226           uint16_t len = ReadLEB128<uint16_t>(&data);
1227           uint8_t extended_op = ReadMemcpy<uint8_t>(&data);
1228           switch (extended_op) {
1229             case DW_LNE_end_sequence: {
1230               // Preserve address and set end_sequence, but reset everything
1231               // else.
1232               uint64_t addr = info_.address;
1233               info_ = LineInfo(params_.default_is_stmt);
1234               info_.address = addr;
1235               info_.end_sequence = true;
1236               if (!shadow_) {
1237                 remaining_ = data;
1238                 return true;
1239               }
1240               break;
1241             }
1242             case DW_LNE_set_address:
1243               info_.address = sizes_.ReadAddress(&data);
1244               info_.op_index = 0;
1245               shadow_ = (info_.address == 0);
1246               break;
1247             case DW_LNE_define_file: {
1248               FileName file_name;
1249               file_name.name = ReadNullTerminated(&data);
1250               file_name.directory_index = ReadLEB128<uint32_t>(&data);
1251               file_name.modified_time = ReadLEB128<uint64_t>(&data);
1252               file_name.file_size = ReadLEB128<uint64_t>(&data);
1253               if (file_name.directory_index >= include_directories_.size()) {
1254                 THROW("directory index out of range");
1255               }
1256               filenames_.push_back(file_name);
1257               break;
1258             }
1259             case DW_LNE_set_discriminator:
1260               info_.discriminator = ReadLEB128<uint32_t>(&data);
1261               break;
1262             default:
1263               // We don't understand this opcode, skip it.
1264               SkipBytes(len, &data);
1265               if (verbose_level > 0) {
1266                 fprintf(stderr,
1267                         "bloaty: warning: unknown DWARF line table extended "
1268                         "opcode: %d\n",
1269                         extended_op);
1270               }
1271               break;
1272           }
1273           break;
1274         }
1275         case DW_LNS_copy:
1276           if (!shadow_) {
1277             remaining_ = data;
1278             return true;
1279           }
1280           break;
1281         case DW_LNS_advance_pc:
1282           Advance(ReadLEB128<uint64_t>(&data));
1283           break;
1284         case DW_LNS_advance_line:
1285           info_.line += ReadLEB128<int32_t>(&data);
1286           break;
1287         case DW_LNS_set_file:
1288           info_.file = ReadLEB128<uint32_t>(&data);
1289           if (info_.file >= filenames_.size()) {
1290             THROW("filename index too big");
1291           }
1292           break;
1293         case DW_LNS_set_column:
1294           info_.column = ReadLEB128<uint32_t>(&data);
1295           break;
1296         case DW_LNS_negate_stmt:
1297           info_.is_stmt = !info_.is_stmt;
1298           break;
1299         case DW_LNS_set_basic_block:
1300           info_.basic_block = true;
1301           break;
1302         case DW_LNS_const_add_pc:
1303           SpecialOpcodeAdvance(255);
1304           break;
1305         case DW_LNS_fixed_advance_pc:
1306           info_.address += ReadMemcpy<uint16_t>(&data);
1307           info_.op_index = 0;
1308           break;
1309         case DW_LNS_set_prologue_end:
1310           info_.prologue_end = true;
1311           break;
1312         case DW_LNS_set_epilogue_begin:
1313           info_.epilogue_begin = true;
1314           break;
1315         case DW_LNS_set_isa:
1316           info_.isa = ReadLEB128<uint8_t>(&data);
1317           break;
1318         default:
1319           // Unknown opcode, but we know its length so can skip it.
1320           SkipBytes(standard_opcode_lengths_[op], &data);
1321           if (verbose_level > 0) {
1322             fprintf(stderr,
1323                     "bloaty: warning: unknown DWARF line table opcode: %d\n",
1324                     op);
1325           }
1326           break;
1327       }
1328     }
1329   }
1330 }
1331 
1332 }  // namespace dwarf
1333 
1334 // Bloaty DWARF Data Sources ///////////////////////////////////////////////////
1335 
1336 // The DWARF .debug_aranges section should, in theory, give us exactly the
1337 // information we need to map file ranges in linked binaries to compilation
1338 // units from where that code came.  However, .debug_aranges is often incomplete
1339 // or missing completely, so we use it as just one of several data sources for
1340 // the "compileunits" data source.
ReadDWARFAddressRanges(const dwarf::File & file,RangeSink * sink)1341 static bool ReadDWARFAddressRanges(const dwarf::File& file, RangeSink* sink) {
1342   // Maps compilation unit offset -> source filename
1343   // Lazily initialized.
1344   class FilenameMap {
1345    public:
1346     FilenameMap(const dwarf::File& file)
1347         : die_reader_(file),
1348           missing_("[DWARF is missing filename]") {
1349       attr_reader_.OnAttribute(
1350           DW_AT_name, [](string_view* s, dwarf::AttrValue data) {
1351             if (!data.IsString()) return;
1352             *s = data.GetString();
1353           });
1354     }
1355 
1356     std::string GetFilename(uint64_t compilation_unit_offset) {
1357       auto& name = map_[compilation_unit_offset];
1358       if (name.empty()) {
1359         name = LookupFilename(compilation_unit_offset);
1360       }
1361       return name;
1362     }
1363 
1364    private:
1365     std::string LookupFilename(uint64_t compilation_unit_offset) {
1366       auto section = dwarf::DIEReader::Section::kDebugInfo;
1367       string_view name;
1368       if (die_reader_.SeekToCompilationUnit(section, compilation_unit_offset) &&
1369           die_reader_.GetTag() == DW_TAG_compile_unit &&
1370           (attr_reader_.ReadAttributes(&die_reader_, &name),
1371            !name.empty())) {
1372         return std::string(name);
1373       } else {
1374         return missing_;
1375       }
1376     }
1377 
1378     dwarf::DIEReader die_reader_;
1379     dwarf::AttrReader<string_view> attr_reader_;
1380     std::unordered_map<uint64_t, std::string> map_;
1381     std::string missing_;
1382   } map(file);
1383 
1384   dwarf::AddressRanges ranges(file.debug_aranges);
1385 
1386   while (ranges.NextUnit()) {
1387     std::string filename = map.GetFilename(ranges.debug_info_offset());
1388 
1389     while (ranges.NextRange()) {
1390       if (ranges.address() != 0) {
1391         sink->AddVMRangeIgnoreDuplicate("dwarf_aranges", ranges.address(),
1392                                         ranges.length(), filename);
1393       }
1394     }
1395   }
1396 
1397   return true;
1398 }
1399 
1400 // TODO(haberman): make these into real protobufs once proto supports
1401 // string_view.
1402 class GeneralDIE {
1403  public:
has_name() const1404   bool has_name() const { return has_name_; }
has_linkage_name() const1405   bool has_linkage_name() const { return has_linkage_name_; }
has_location_string() const1406   bool has_location_string() const { return has_location_string_; }
has_low_pc() const1407   bool has_low_pc() const { return has_low_pc_; }
has_high_pc() const1408   bool has_high_pc() const { return has_high_pc_; }
has_location_uint64() const1409   bool has_location_uint64() const { return has_location_uint64_; }
has_stmt_list() const1410   bool has_stmt_list() const { return has_stmt_list_; }
has_ranges() const1411   bool has_ranges() const { return has_ranges_; }
has_start_scope() const1412   bool has_start_scope() const { return has_start_scope_; }
1413 
DebugString()1414   std::string DebugString() {
1415     std::string ret;
1416     if (has_name()) {
1417       ret += absl::Substitute("name: $0\n", name());
1418     }
1419     if (has_linkage_name()) {
1420       ret += absl::Substitute("linkage_name: $0\n", linkage_name());
1421     }
1422     if (has_location_string()) {
1423       ret += absl::Substitute("location_string: $0\n", location_string());
1424     }
1425     if (has_low_pc()) {
1426       ret += absl::Substitute("low_pc: $0\n", low_pc());
1427     }
1428     if (has_high_pc()) {
1429       ret += absl::Substitute("high_pc: $0\n", high_pc());
1430     }
1431     if (has_location_uint64()) {
1432       ret += absl::Substitute("location_uint64: $0\n", location_uint64());
1433     }
1434     if (has_stmt_list()) {
1435       ret += absl::Substitute("stmt_list: $0\n", stmt_list());
1436     }
1437     if (has_ranges()) {
1438       ret += absl::Substitute("ranges: $0\n", ranges());
1439     }
1440     if (has_start_scope()) {
1441       ret += absl::Substitute("start_scope: $0\n", start_scope());
1442     }
1443     return ret;
1444   }
1445 
name() const1446   string_view name() const { return name_; }
linkage_name() const1447   string_view linkage_name() const { return linkage_name_; }
location_string() const1448   string_view location_string() const { return location_string_; }
low_pc() const1449   uint64_t low_pc() const { return low_pc_; }
high_pc() const1450   uint64_t high_pc() const { return high_pc_; }
location_uint64() const1451   uint64_t location_uint64() const { return location_uint64_; }
stmt_list() const1452   uint64_t stmt_list() const { return stmt_list_; }
ranges() const1453   uint64_t ranges() const { return ranges_; }
start_scope() const1454   uint64_t start_scope() const { return start_scope_; }
1455 
set_name(string_view val)1456   void set_name(string_view val) {
1457     has_name_ = true;
1458     name_ = val;
1459   }
set_linkage_name(string_view val)1460   void set_linkage_name(string_view val) {
1461     has_linkage_name_ = true;
1462     location_string_ = val;
1463   }
set_location_string(string_view val)1464   void set_location_string(string_view val) {
1465     has_location_string_ = true;
1466     location_string_ = val;
1467   }
set_low_pc(uint64_t val)1468   void set_low_pc(uint64_t val) {
1469     has_low_pc_ = true;
1470     low_pc_ = val;
1471   }
set_high_pc(uint64_t val)1472   void set_high_pc(uint64_t val) {
1473     has_high_pc_ = true;
1474     high_pc_ = val;
1475   }
set_location_uint64(uint64_t val)1476   void set_location_uint64(uint64_t val) {
1477     has_location_uint64_ = true;
1478     location_uint64_ = val;
1479   }
set_stmt_list(uint64_t val)1480   void set_stmt_list(uint64_t val) {
1481     has_stmt_list_ = true;
1482     stmt_list_ = val;
1483   }
set_ranges(uint64_t val)1484   void set_ranges(uint64_t val) {
1485     has_ranges_ = true;
1486     ranges_ = val;
1487   }
set_start_scope(uint64_t val)1488   void set_start_scope(uint64_t val) {
1489     has_start_scope_ = true;
1490     start_scope_ = val;
1491   }
1492 
1493  private:
1494   bool has_name_ = false;
1495   bool has_linkage_name_ = false;
1496   bool has_location_string_ = false;
1497   bool has_low_pc_ = false;
1498   bool has_high_pc_ = false;
1499   bool has_location_uint64_ = false;
1500   bool has_stmt_list_ = false;
1501   bool has_ranges_ = false;
1502   bool has_start_scope_ = false;
1503 
1504   string_view name_;
1505   string_view linkage_name_;
1506   string_view location_string_;
1507   uint64_t low_pc_ = 0;
1508   uint64_t high_pc_ = 0;
1509   uint64_t location_uint64_ = 0;
1510   uint64_t stmt_list_ = 0;
1511   uint64_t ranges_ = 0;
1512   uint64_t start_scope_ = 0;
1513 };
1514 
1515 class InlinesDIE {
1516  public:
has_stmt_list() const1517   bool has_stmt_list() const { return has_stmt_list_; }
1518 
stmt_list() const1519   uint64_t stmt_list() const { return stmt_list_; }
1520 
set_stmt_list(uint64_t val)1521   void set_stmt_list(uint64_t val) {
1522     has_stmt_list_ = true;
1523     stmt_list_ = val;
1524   }
1525 
1526  private:
1527   bool has_stmt_list_ = false;
1528   uint64_t stmt_list_ = 0;
1529 };
1530 
AddDIE(const dwarf::File & file,const std::string & name,const GeneralDIE & die,const SymbolTable & symtab,const DualMap & symbol_map,const dwarf::CompilationUnitSizes & sizes,RangeSink * sink)1531 void AddDIE(const dwarf::File& file, const std::string& name,
1532             const GeneralDIE& die, const SymbolTable& symtab,
1533             const DualMap& symbol_map, const dwarf::CompilationUnitSizes& sizes,
1534             RangeSink* sink) {
1535   // Some DIEs mark address ranges with high_pc/low_pc pairs (especially
1536   // functions).
1537   if (die.has_low_pc() && die.has_high_pc() && die.low_pc() != 0) {
1538     uint64_t high_pc = die.high_pc();
1539 
1540     // It appears that some compilers make high_pc a size, and others make it an
1541     // address.
1542     if (high_pc >= die.low_pc()) {
1543       high_pc -= die.low_pc();
1544     }
1545     sink->AddVMRangeIgnoreDuplicate("dwarf_pcpair", die.low_pc(), high_pc,
1546                                     name);
1547   }
1548 
1549   // Sometimes a DIE has a linkage_name, which we can look up in the symbol
1550   // table.
1551   if (die.has_linkage_name()) {
1552     auto it = symtab.find(die.linkage_name());
1553     if (it != symtab.end()) {
1554       sink->AddVMRangeIgnoreDuplicate("dwarf_linkagename", it->second.first,
1555                                       it->second.second, name);
1556     }
1557   }
1558 
1559   // Sometimes the DIE has a "location", which gives the location as an address.
1560   // This parses a very small subset of the overall DWARF expression grammar.
1561   if (die.has_location_string()) {
1562     string_view location = die.location_string();
1563     if (location.size() == sizes.address_size() + 1 &&
1564         location[0] == DW_OP_addr) {
1565       location.remove_prefix(1);
1566       uint64_t addr;
1567       // TODO(haberman): endian?
1568       if (sizes.address_size() == 4) {
1569         addr = dwarf::ReadMemcpy<uint32_t>(&location);
1570       } else if (sizes.address_size() == 8) {
1571         addr = dwarf::ReadMemcpy<uint64_t>(&location);
1572       } else {
1573         BLOATY_UNREACHABLE();
1574       }
1575 
1576       // Unfortunately the location doesn't include a size, so we look that part
1577       // up in the symbol map.
1578       uint64_t size;
1579       if (symbol_map.vm_map.TryGetSize(addr, &size)) {
1580         sink->AddVMRangeIgnoreDuplicate("dwarf_location", addr, size, name);
1581       } else {
1582         if (verbose_level > 0) {
1583           fprintf(stderr,
1584                   "bloaty: warning: couldn't find DWARF location in symbol "
1585                   "table, address: %" PRIx64 "\n",
1586                   addr);
1587         }
1588       }
1589     }
1590   }
1591 
1592   // Sometimes a location is given as an offset into debug_loc.
1593   if (die.has_location_uint64()) {
1594     if (die.location_uint64() < file.debug_loc.size()) {
1595       absl::string_view loc_range = file.debug_loc.substr(die.location_uint64());
1596       loc_range = GetLocationListRange(sizes, loc_range);
1597       sink->AddFileRange("dwarf_locrange", name, loc_range);
1598     } else if (verbose_level > 0) {
1599       fprintf(stderr,
1600               "bloaty: warning: DWARF location out of range, location=%" PRIx64
1601               "\n",
1602               die.location_uint64());
1603     }
1604   }
1605 
1606   uint64_t ranges_offset = UINT64_MAX;
1607 
1608   // There are two different attributes that sometimes contain an offset into
1609   // debug_ranges.
1610   if (die.has_ranges()) {
1611     ranges_offset = die.ranges();
1612   } else if (die.has_start_scope()) {
1613     ranges_offset = die.start_scope();
1614   }
1615 
1616   if (ranges_offset != UINT64_MAX) {
1617     if (ranges_offset < file.debug_ranges.size()) {
1618       absl::string_view ranges_range = file.debug_ranges.substr(ranges_offset);
1619       ranges_range = GetRangeListRange(sizes, ranges_range);
1620       sink->AddFileRange("dwarf_debugrange", name, ranges_range);
1621     } else if (verbose_level > 0) {
1622       fprintf(stderr,
1623               "bloaty: warning: DWARF debug range out of range, "
1624               "ranges_offset=%" PRIx64 "\n",
1625               ranges_offset);
1626     }
1627   }
1628 }
1629 
ReadDWARFPubNames(const dwarf::File & file,string_view section,RangeSink * sink)1630 static void ReadDWARFPubNames(const dwarf::File& file, string_view section,
1631                               RangeSink* sink) {
1632   dwarf::DIEReader die_reader(file);
1633   dwarf::AttrReader<string_view> attr_reader;
1634   string_view remaining = section;
1635 
1636   attr_reader.OnAttribute(
1637       DW_AT_name, [](string_view* s, dwarf::AttrValue data) {
1638         if (data.type() == dwarf::AttrValue::Type::kString) {
1639           *s = data.GetString();
1640         }
1641       });
1642 
1643   while (remaining.size() > 0) {
1644     dwarf::CompilationUnitSizes sizes;
1645     string_view full_unit = remaining;
1646     string_view unit = sizes.ReadInitialLength(&remaining);
1647     full_unit =
1648         full_unit.substr(0, unit.size() + (unit.data() - full_unit.data()));
1649     sizes.ReadDWARFVersion(&unit);
1650     uint64_t debug_info_offset = sizes.ReadDWARFOffset(&unit);
1651     bool ok = die_reader.SeekToCompilationUnit(
1652         dwarf::DIEReader::Section::kDebugInfo, debug_info_offset);
1653     if (!ok) {
1654       THROW("Couldn't seek to debug_info section");
1655     }
1656     string_view compileunit_name;
1657     attr_reader.ReadAttributes(&die_reader, &compileunit_name);
1658     if (!compileunit_name.empty()) {
1659       sink->AddFileRange("dwarf_pubnames", compileunit_name, full_unit);
1660     }
1661   }
1662 }
1663 
ReadEncodedPointer(uint8_t encoding,bool is_64bit,string_view * data,const char * data_base,RangeSink * sink)1664 uint64_t ReadEncodedPointer(uint8_t encoding, bool is_64bit, string_view* data,
1665                             const char* data_base, RangeSink* sink) {
1666   uint64_t value;
1667   const char* ptr = data->data();
1668   uint8_t format = encoding & DW_EH_PE_FORMAT_MASK;
1669 
1670   switch (format) {
1671     case DW_EH_PE_omit:
1672       return 0;
1673     case DW_EH_PE_absptr:
1674       if (is_64bit) {
1675         value = dwarf::ReadMemcpy<uint64_t>(data);
1676       } else {
1677         value = dwarf::ReadMemcpy<uint32_t>(data);
1678       }
1679       break;
1680     case DW_EH_PE_uleb128:
1681       value = dwarf::ReadLEB128<uint64_t>(data);
1682       break;
1683     case DW_EH_PE_udata2:
1684       value = dwarf::ReadMemcpy<uint16_t>(data);
1685       break;
1686     case DW_EH_PE_udata4:
1687       value = dwarf::ReadMemcpy<uint32_t>(data);
1688       break;
1689     case DW_EH_PE_udata8:
1690       value = dwarf::ReadMemcpy<uint64_t>(data);
1691       break;
1692     case DW_EH_PE_sleb128:
1693       value = dwarf::ReadLEB128<int64_t>(data);
1694       break;
1695     case DW_EH_PE_sdata2:
1696       value = dwarf::ReadMemcpy<int16_t>(data);
1697       break;
1698     case DW_EH_PE_sdata4:
1699       value = dwarf::ReadMemcpy<int32_t>(data);
1700       break;
1701     case DW_EH_PE_sdata8:
1702       value = dwarf::ReadMemcpy<int64_t>(data);
1703       break;
1704     default:
1705       THROWF("Unexpected eh_frame format value: $0", format);
1706   }
1707 
1708   uint8_t application = encoding & DW_EH_PE_APPLICATION_MASK;
1709 
1710   switch (application) {
1711     case 0:
1712       break;
1713     case DW_EH_PE_pcrel:
1714       value += sink->TranslateFileToVM(ptr);
1715       break;
1716     case DW_EH_PE_datarel:
1717       if (data_base == nullptr) {
1718         THROW("datarel requested but no data_base provided");
1719       }
1720       value += sink->TranslateFileToVM(data_base);
1721       break;
1722     case DW_EH_PE_textrel:
1723     case DW_EH_PE_funcrel:
1724     case DW_EH_PE_aligned:
1725       THROWF("Unimplemented eh_frame application value: $0", application);
1726   }
1727 
1728   if (encoding & DW_EH_PE_indirect) {
1729     string_view location = sink->TranslateVMToFile(value);
1730     if (is_64bit) {
1731       value = dwarf::ReadMemcpy<uint64_t>(&location);
1732     } else {
1733       value = dwarf::ReadMemcpy<uint32_t>(&location);
1734     }
1735   }
1736 
1737   return value;
1738 }
1739 
1740 // Code to read the .eh_frame section.  This is not technically DWARF, but it
1741 // is similar to .debug_frame (which is DWARF) so it's convenient to put it
1742 // here.
1743 //
1744 // The best documentation I can find for this format comes from:
1745 //
1746 // *
1747 // http://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
1748 // * https://www.airs.com/blog/archives/460
1749 //
1750 // However these are both under-specified.  Some details are not mentioned in
1751 // either of these (for example, the fact that the function length uses the FDE
1752 // encoding, but always absolute).  libdwarf's implementation contains a comment
1753 // saying "It is not clear if this is entirely correct".  Basically the only
1754 // thing you can trust for some of these details is the code that actually
1755 // implements unwinding in production:
1756 //
1757 // * libunwind http://www.nongnu.org/libunwind/
1758 //   https://github.com/pathscale/libunwind/blob/master/src/dwarf/Gfde.c
1759 // * LLVM libunwind (a different project!!)
1760 //   https://github.com/llvm-mirror/libunwind/blob/master/src/DwarfParser.hpp
1761 // * libgcc
1762 //   https://github.com/gcc-mirror/gcc/blob/master/libgcc/unwind-dw2-fde.c
ReadEhFrame(string_view data,RangeSink * sink)1763 void ReadEhFrame(string_view data, RangeSink* sink) {
1764   string_view remaining = data;
1765 
1766   struct CIEInfo {
1767     int version = 0;
1768     uint32_t code_align = 0;
1769     int32_t data_align = 0;
1770     uint8_t fde_encoding = 0;
1771     uint8_t lsda_encoding = 0;
1772     bool is_signal_handler = false;
1773     bool has_augmentation_length = false;
1774     uint64_t personality_function = 0;
1775     uint32_t return_address_reg = 0;
1776   };
1777 
1778   std::unordered_map<const void*, CIEInfo> cie_map;
1779 
1780   while (remaining.size() > 0) {
1781     dwarf::CompilationUnitSizes sizes;
1782     string_view full_entry = remaining;
1783     string_view entry = sizes.ReadInitialLength(&remaining);
1784     if (entry.size() == 0 && remaining.size() == 0) {
1785       return;
1786     }
1787     full_entry =
1788         full_entry.substr(0, entry.size() + (entry.data() - full_entry.data()));
1789     uint32_t id = dwarf::ReadMemcpy<uint32_t>(&entry);
1790     if (id == 0) {
1791       // CIE, we don't attribute this yet.
1792       CIEInfo& cie_info = cie_map[full_entry.data()];
1793       cie_info.version = dwarf::ReadMemcpy<uint8_t>(&entry);
1794       string_view aug_string = dwarf::ReadNullTerminated(&entry);
1795       cie_info.code_align = dwarf::ReadLEB128<uint32_t>(&entry);
1796       cie_info.data_align = dwarf::ReadLEB128<int32_t>(&entry);
1797       switch (cie_info.version) {
1798         case 1:
1799           cie_info.return_address_reg = dwarf::ReadMemcpy<uint8_t>(&entry);
1800           break;
1801         case 3:
1802           cie_info.return_address_reg = dwarf::ReadLEB128<uint32_t>(&entry);
1803           break;
1804         default:
1805           THROW("Unexpected eh_frame CIE version");
1806       }
1807       while (aug_string.size() > 0) {
1808         switch (aug_string[0]) {
1809           case 'z':
1810             // Length until the end of augmentation data.
1811             cie_info.has_augmentation_length = true;
1812             dwarf::ReadLEB128<uint32_t>(&entry);
1813             break;
1814           case 'L':
1815             cie_info.lsda_encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
1816             break;
1817           case 'R':
1818             cie_info.fde_encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
1819             break;
1820           case 'S':
1821             cie_info.is_signal_handler = true;
1822             break;
1823           case 'P': {
1824             uint8_t encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
1825             cie_info.personality_function =
1826                 ReadEncodedPointer(encoding, true, &entry, nullptr, sink);
1827             break;
1828           }
1829           default:
1830             THROW("Unexepcted augmentation character");
1831         }
1832         aug_string.remove_prefix(1);
1833       }
1834     } else {
1835       auto iter = cie_map.find(entry.data() - id - 4);
1836       if (iter == cie_map.end()) {
1837         THROW("Couldn't find CIE for FDE");
1838       }
1839       const CIEInfo& cie_info = iter->second;
1840       // TODO(haberman): don't hard-code 64-bit.
1841       uint64_t address = ReadEncodedPointer(cie_info.fde_encoding, true, &entry,
1842                                             nullptr, sink);
1843       // TODO(haberman); Technically the FDE addresses could span a
1844       // function/compilation unit?  They can certainly span inlines.
1845       /*
1846       uint64_t length =
1847         ReadEncodedPointer(cie_info.fde_encoding & 0xf, true, &entry, sink);
1848       (void)length;
1849 
1850       if (cie_info.has_augmentation_length) {
1851         uint32_t augmentation_length = dwarf::ReadLEB128<uint32_t>(&entry);
1852         (void)augmentation_length;
1853       }
1854 
1855       uint64_t lsda =
1856           ReadEncodedPointer(cie_info.lsda_encoding, true, &entry, sink);
1857       if (lsda) {
1858       }
1859       */
1860 
1861       sink->AddFileRangeForVMAddr("dwarf_fde", address, full_entry);
1862     }
1863   }
1864 }
1865 
1866 // See documentation here:
1867 //   http://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html#EHFRAME
ReadEhFrameHdr(string_view data,RangeSink * sink)1868 void ReadEhFrameHdr(string_view data, RangeSink* sink) {
1869   const char* base = data.data();
1870   uint8_t version = dwarf::ReadMemcpy<uint8_t>(&data);
1871   uint8_t eh_frame_ptr_enc = dwarf::ReadMemcpy<uint8_t>(&data);
1872   uint8_t fde_count_enc = dwarf::ReadMemcpy<uint8_t>(&data);
1873   uint8_t table_enc = dwarf::ReadMemcpy<uint8_t>(&data);
1874 
1875   if (version != 1) {
1876     THROWF("Unknown eh_frame_hdr version: $0", version);
1877   }
1878 
1879   // TODO(haberman): don't hard-code 64-bit.
1880   uint64_t eh_frame_ptr =
1881       ReadEncodedPointer(eh_frame_ptr_enc, true, &data, base, sink);
1882   (void)eh_frame_ptr;
1883   uint64_t fde_count =
1884       ReadEncodedPointer(fde_count_enc, true, &data, base, sink);
1885 
1886   for (uint64_t i = 0; i < fde_count; i++) {
1887     string_view entry_data = data;
1888     uint64_t initial_location =
1889         ReadEncodedPointer(table_enc, true, &data, base, sink);
1890     uint64_t fde_addr = ReadEncodedPointer(table_enc, true, &data, base, sink);
1891     entry_data.remove_suffix(data.size());
1892     sink->AddFileRangeForVMAddr("dwarf_fde_table", initial_location,
1893                                 entry_data);
1894 
1895     // We could add fde_addr with an unknown length if we wanted to skip reading
1896     // eh_frame.  We can't count on this table being available though, so we
1897     // don't want to remove the eh_frame reading code altogether.
1898     (void)fde_addr;
1899   }
1900 }
1901 
ReadDWARFStmtListRange(const dwarf::File & file,uint64_t offset,string_view unit_name,RangeSink * sink)1902 static void ReadDWARFStmtListRange(const dwarf::File& file, uint64_t offset,
1903                                    string_view unit_name, RangeSink* sink) {
1904   string_view data = file.debug_line;
1905   dwarf::SkipBytes(offset, &data);
1906   string_view data_with_length = data;
1907   dwarf::CompilationUnitSizes sizes;
1908   data = sizes.ReadInitialLength(&data);
1909   data = data_with_length.substr(
1910       0, data.size() + (data.data() - data_with_length.data()));
1911   sink->AddFileRange("dwarf_stmtlistrange", unit_name, data);
1912 }
1913 
1914 // The DWARF debug info can help us get compileunits info.  DIEs for compilation
1915 // units, functions, and global variables often have attributes that will
1916 // resolve to addresses.
ReadDWARFDebugInfo(const dwarf::File & file,dwarf::DIEReader::Section section,const SymbolTable & symtab,const DualMap & symbol_map,RangeSink * sink,std::unordered_map<uint64_t,std::string> * stmt_list_map)1917 static void ReadDWARFDebugInfo(
1918     const dwarf::File& file, dwarf::DIEReader::Section section,
1919     const SymbolTable& symtab, const DualMap& symbol_map, RangeSink* sink,
1920     std::unordered_map<uint64_t, std::string>* stmt_list_map) {
1921   dwarf::DIEReader die_reader(file);
1922   die_reader.set_strp_sink(sink);
1923   dwarf::AttrReader<GeneralDIE> attr_reader;
1924 
1925   attr_reader.OnAttribute(DW_AT_name,
1926                           [](GeneralDIE* die, dwarf::AttrValue val) {
1927                             if (!val.IsString()) return;
1928                             die->set_name(val.GetString());
1929                           });
1930   attr_reader.OnAttribute(DW_AT_linkage_name,
1931                           [](GeneralDIE* die, dwarf::AttrValue val) {
1932                             if (!val.IsString()) return;
1933                             die->set_linkage_name(val.GetString());
1934                           });
1935   attr_reader.OnAttribute(DW_AT_location,
1936                           [](GeneralDIE* die, dwarf::AttrValue val) {
1937                             if (val.IsString()) {
1938                               die->set_location_string(val.GetString());
1939                             } else {
1940                               die->set_location_uint64(val.GetUint());
1941                             }
1942                           });
1943   attr_reader.OnAttribute(DW_AT_low_pc,
1944                           [](GeneralDIE* die, dwarf::AttrValue val) {
1945                             absl::optional<uint64_t> uint = val.ToUint();
1946                             if (!uint.has_value()) return;
1947                             die->set_low_pc(uint.value());
1948                           });
1949   attr_reader.OnAttribute(DW_AT_high_pc,
1950                           [](GeneralDIE* die, dwarf::AttrValue val) {
1951                             absl::optional<uint64_t> uint = val.ToUint();
1952                             if (!uint.has_value()) return;
1953                             die->set_high_pc(uint.value());
1954                           });
1955   attr_reader.OnAttribute(DW_AT_stmt_list,
1956                           [](GeneralDIE* die, dwarf::AttrValue val) {
1957                             absl::optional<uint64_t> uint = val.ToUint();
1958                             if (!uint.has_value()) return;
1959                             die->set_stmt_list(uint.value());
1960                           });
1961   attr_reader.OnAttribute(DW_AT_ranges,
1962                           [](GeneralDIE* die, dwarf::AttrValue val) {
1963                             absl::optional<uint64_t> uint = val.ToUint();
1964                             if (!uint.has_value()) return;
1965                             die->set_ranges(uint.value());
1966                           });
1967   attr_reader.OnAttribute(DW_AT_start_scope,
1968                           [](GeneralDIE* die, dwarf::AttrValue val) {
1969                             absl::optional<uint64_t> uint = val.ToUint();
1970                             if (!uint.has_value()) return;
1971                             die->set_start_scope(uint.value());
1972                           });
1973 
1974   if (!die_reader.SeekToStart(section)) {
1975     return;
1976   }
1977 
1978   do {
1979     GeneralDIE compileunit_die;
1980     attr_reader.ReadAttributes(&die_reader, &compileunit_die);
1981     std::string compileunit_name = std::string(compileunit_die.name());
1982 
1983     if (compileunit_die.has_stmt_list()) {
1984       uint64_t stmt_list = compileunit_die.stmt_list();
1985       if (compileunit_name.empty()) {
1986         auto iter = stmt_list_map->find(stmt_list);
1987         if (iter != stmt_list_map->end()) {
1988           compileunit_name = iter->second;
1989         }
1990       } else {
1991         (*stmt_list_map)[stmt_list] = compileunit_name;
1992       }
1993     }
1994 
1995     if (compileunit_name.empty()) {
1996       continue;
1997     }
1998 
1999     die_reader.set_compileunit_name(compileunit_name);
2000     sink->AddFileRange("dwarf_debuginfo", compileunit_name,
2001                        die_reader.unit_range());
2002     AddDIE(file, compileunit_name, compileunit_die, symtab, symbol_map,
2003            die_reader.unit_sizes(), sink);
2004 
2005     if (compileunit_die.has_stmt_list()) {
2006       uint64_t offset = compileunit_die.stmt_list();
2007       ReadDWARFStmtListRange(file, offset, compileunit_name, sink);
2008     }
2009 
2010     string_view abbrev_data = file.debug_abbrev;
2011     dwarf::SkipBytes(die_reader.debug_abbrev_offset(), &abbrev_data);
2012     dwarf::AbbrevTable unit_abbrev;
2013     abbrev_data = unit_abbrev.ReadAbbrevs(abbrev_data);
2014     sink->AddFileRange("dwarf_abbrev", compileunit_name, abbrev_data);
2015 
2016     while (die_reader.NextDIE()) {
2017       GeneralDIE die;
2018       attr_reader.ReadAttributes(&die_reader, &die);
2019 
2020       // low_pc == 0 is a signal that this routine was stripped out of the
2021       // final binary.  Skip this DIE and all of its children.
2022       if (die.has_low_pc() && die.low_pc() == 0) {
2023         die_reader.SkipChildren();
2024       } else {
2025         AddDIE(file, compileunit_name, die, symtab, symbol_map,
2026                die_reader.unit_sizes(), sink);
2027       }
2028     }
2029   } while (die_reader.NextCompilationUnit());
2030 }
2031 
ReadDWARFCompileUnits(const dwarf::File & file,const SymbolTable & symtab,const DualMap & symbol_map,RangeSink * sink)2032 void ReadDWARFCompileUnits(const dwarf::File& file, const SymbolTable& symtab,
2033                            const DualMap& symbol_map, RangeSink* sink) {
2034   if (!file.debug_info.size()) {
2035     THROW("missing debug info");
2036   }
2037 
2038   if (file.debug_aranges.size()) {
2039     ReadDWARFAddressRanges(file, sink);
2040   }
2041 
2042   std::unordered_map<uint64_t, std::string> stmt_list_map;
2043   ReadDWARFDebugInfo(file, dwarf::DIEReader::Section::kDebugInfo, symtab,
2044                      symbol_map, sink, &stmt_list_map);
2045   ReadDWARFDebugInfo(file, dwarf::DIEReader::Section::kDebugTypes, symtab,
2046                      symbol_map, sink, &stmt_list_map);
2047   ReadDWARFPubNames(file, file.debug_pubnames, sink);
2048   ReadDWARFPubNames(file, file.debug_pubtypes, sink);
2049 }
2050 
LineInfoKey(const std::string & file,uint32_t line,bool include_line)2051 static std::string LineInfoKey(const std::string& file, uint32_t line,
2052                                bool include_line) {
2053   if (include_line) {
2054     return file + ":" + std::to_string(line);
2055   } else {
2056     return file;
2057   }
2058 }
2059 
ReadDWARFStmtList(bool include_line,dwarf::LineInfoReader * line_info_reader,RangeSink * sink)2060 static void ReadDWARFStmtList(bool include_line,
2061                               dwarf::LineInfoReader* line_info_reader,
2062                               RangeSink* sink) {
2063   uint64_t span_startaddr = 0;
2064   std::string last_source;
2065 
2066   while (line_info_reader->ReadLineInfo()) {
2067     const auto& line_info = line_info_reader->lineinfo();
2068     auto addr = line_info.address;
2069     auto number = line_info.line;
2070     auto name =
2071         line_info.end_sequence
2072             ? last_source
2073             : LineInfoKey(line_info_reader->GetExpandedFilename(line_info.file),
2074                           number, include_line);
2075     if (!span_startaddr) {
2076       span_startaddr = addr;
2077     } else if (line_info.end_sequence ||
2078                (!last_source.empty() && name != last_source)) {
2079       sink->AddVMRange("dwarf_stmtlist", span_startaddr, addr - span_startaddr,
2080                        last_source);
2081       if (line_info.end_sequence) {
2082         span_startaddr = 0;
2083       } else {
2084         span_startaddr = addr;
2085       }
2086     }
2087     last_source = name;
2088   }
2089 }
2090 
ReadDWARFInlines(const dwarf::File & file,RangeSink * sink,bool include_line)2091 void ReadDWARFInlines(const dwarf::File& file, RangeSink* sink,
2092                       bool include_line) {
2093   if (!file.debug_info.size() || !file.debug_line.size()) {
2094     THROW("no debug info");
2095   }
2096 
2097   dwarf::DIEReader die_reader(file);
2098   dwarf::LineInfoReader line_info_reader(file);
2099   dwarf::AttrReader<InlinesDIE> attr_reader;
2100 
2101   attr_reader.OnAttribute(
2102       DW_AT_stmt_list, [](InlinesDIE* die, dwarf::AttrValue data) {
2103         absl::optional<uint64_t> uint = data.ToUint();
2104         if (!uint.has_value()) return;
2105         die->set_stmt_list(uint.value());
2106       });
2107 
2108   if (!die_reader.SeekToStart(dwarf::DIEReader::Section::kDebugInfo)) {
2109     THROW("debug info is present, but empty");
2110   }
2111 
2112   while (true) {
2113     InlinesDIE die;
2114     attr_reader.ReadAttributes(&die_reader, &die);
2115 
2116     if (die.has_stmt_list()) {
2117       uint64_t offset = die.stmt_list();
2118       line_info_reader.SeekToOffset(offset,
2119                                     die_reader.unit_sizes().address_size());
2120       ReadDWARFStmtList(include_line, &line_info_reader, sink);
2121     }
2122 
2123     if (!die_reader.NextCompilationUnit()) {
2124       return;
2125     }
2126   }
2127 }
2128 
2129 }  // namespace bloaty
2130