1 // Copyright 2016 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <assert.h>
16 #include <stdio.h>
17
18 #include <algorithm>
19 #include <initializer_list>
20 #include <iostream>
21 #include <memory>
22 #include <stack>
23 #include <unordered_map>
24 #include <unordered_set>
25 #include <vector>
26
27 #include "absl/base/attributes.h"
28 #include "absl/base/macros.h"
29 #include "absl/strings/string_view.h"
30 #include "absl/strings/substitute.h"
31 #include "absl/types/optional.h"
32 #include "bloaty.h"
33 #include "bloaty.pb.h"
34 #include "dwarf_constants.h"
35 #include "re2/re2.h"
36
37 using namespace dwarf2reader;
38 using absl::string_view;
39
AlignUpTo(size_t offset,size_t granularity)40 static size_t AlignUpTo(size_t offset, size_t granularity) {
41 // Granularity must be a power of two.
42 return (offset + granularity - 1) & ~(granularity - 1);
43 }
44
45 ABSL_ATTRIBUTE_NORETURN
Throw(const char * str,int line)46 static void Throw(const char *str, int line) {
47 throw bloaty::Error(str, __FILE__, line);
48 }
49
50 #define THROW(msg) Throw(msg, __LINE__)
51 #define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
52
53 namespace bloaty {
54
55 extern int verbose_level;
56
57 namespace dwarf {
58
DivRoundUp(int n,int d)59 int DivRoundUp(int n, int d) {
60 return (n + (d - 1)) / d;
61 }
62
63
64 // Low-level Parsing Routines //////////////////////////////////////////////////
65
66 // For parsing the low-level values found in DWARF files. These are the only
67 // routines that touch the bytes of the input buffer directly. Everything else
68 // is layered on top of these.
69
70 template <class T>
ReadMemcpy(string_view * data)71 T ReadMemcpy(string_view* data) {
72 T ret;
73 if (data->size() < sizeof(T)) {
74 THROW("premature EOF reading fixed-length DWARF data");
75 }
76 memcpy(&ret, data->data(), sizeof(T));
77 data->remove_prefix(sizeof(T));
78 return ret;
79 }
80
ReadPiece(size_t bytes,string_view * data)81 string_view ReadPiece(size_t bytes, string_view* data) {
82 if(data->size() < bytes) {
83 THROW("premature EOF reading variable-length DWARF data");
84 }
85 string_view ret = data->substr(0, bytes);
86 data->remove_prefix(bytes);
87 return ret;
88 }
89
SkipBytes(size_t bytes,string_view * data)90 void SkipBytes(size_t bytes, string_view* data) {
91 if (data->size() < bytes) {
92 THROW("premature EOF skipping DWARF data");
93 }
94 data->remove_prefix(bytes);
95 }
96
ReadNullTerminated(string_view * data)97 string_view ReadNullTerminated(string_view* data) {
98 const char* nullz =
99 static_cast<const char*>(memchr(data->data(), '\0', data->size()));
100
101 // Return false if not NULL-terminated.
102 if (nullz == NULL) {
103 THROW("DWARF string was not NULL-terminated");
104 }
105
106 size_t len = nullz - data->data();
107 string_view val = data->substr(0, len);
108 data->remove_prefix(len + 1); // Remove NULL also.
109 return val;
110 }
111
SkipNullTerminated(string_view * data)112 void SkipNullTerminated(string_view* data) {
113 const char* nullz =
114 static_cast<const char*>(memchr(data->data(), '\0', data->size()));
115
116 // Return false if not NULL-terminated.
117 if (nullz == NULL) {
118 THROW("DWARF string was not NULL-terminated");
119 }
120
121 size_t len = nullz - data->data();
122 data->remove_prefix(len + 1); // Remove NULL also.
123 }
124
125 // Parses the LEB128 format defined by DWARF (both signed and unsigned
126 // versions).
127
ReadLEB128Internal(bool is_signed,string_view * data)128 uint64_t ReadLEB128Internal(bool is_signed, string_view* data) {
129 uint64_t ret = 0;
130 int shift = 0;
131 int maxshift = 70;
132 const char* ptr = data->data();
133 const char* limit = ptr + data->size();
134
135 while (ptr < limit && shift < maxshift) {
136 char byte = *(ptr++);
137 ret |= static_cast<uint64_t>(byte & 0x7f) << shift;
138 shift += 7;
139 if ((byte & 0x80) == 0) {
140 data->remove_prefix(ptr - data->data());
141 if (is_signed && shift < 64 && (byte & 0x40)) {
142 ret |= -(1ULL << shift);
143 }
144 return ret;
145 }
146 }
147
148 THROW("corrupt DWARF data, unterminated LEB128");
149 }
150
151 template <typename T>
ReadLEB128(string_view * data)152 T ReadLEB128(string_view* data) {
153 typedef typename std::conditional<std::is_signed<T>::value, int64_t,
154 uint64_t>::type Int64Type;
155 Int64Type val = ReadLEB128Internal(std::is_signed<T>::value, data);
156 if (val > std::numeric_limits<T>::max() ||
157 val < std::numeric_limits<T>::min()) {
158 THROW("DWARF data contained larger LEB128 than we were expecting");
159 }
160 return static_cast<T>(val);
161 }
162
SkipLEB128(string_view * data)163 void SkipLEB128(string_view* data) {
164 size_t limit =
165 std::min(static_cast<size_t>(data->size()), static_cast<size_t>(10));
166 for (size_t i = 0; i < limit; i++) {
167 if (((*data)[i] & 0x80) == 0) {
168 data->remove_prefix(i + 1);
169 return;
170 }
171 }
172
173 THROW("corrupt DWARF data, unterminated LEB128");
174 }
175
176 // Some size information attached to each compilation unit. The size of an
177 // address or offset in the DWARF data depends on this state which is parsed
178 // from the header.
179 class CompilationUnitSizes {
180 public:
181 // When true, DWARF offsets are 64 bits, otherwise they are 32 bit.
dwarf64() const182 bool dwarf64() const { return dwarf64_; }
183
184 // The size of addresses. Guaranteed to be either 4 or 8.
address_size() const185 uint8_t address_size() const { return address_size_; }
186
187 // DWARF version of this unit.
dwarf_version() const188 uint8_t dwarf_version() const { return dwarf_version_; }
189
SetAddressSize(uint8_t address_size)190 void SetAddressSize(uint8_t address_size) {
191 if (address_size != 4 && address_size != 8) {
192 THROWF("Unexpected address size: $0", address_size);
193 }
194 address_size_ = address_size;
195 }
196
197 // To allow this as the key in a map.
operator <(const CompilationUnitSizes & rhs) const198 bool operator<(const CompilationUnitSizes& rhs) const {
199 return std::tie(dwarf64_, address_size_) <
200 std::tie(rhs.dwarf64_, rhs.address_size_);
201 }
202
203 // Reads a DWARF offset based on whether we are reading dwarf32 or dwarf64
204 // format.
ReadDWARFOffset(string_view * data) const205 uint64_t ReadDWARFOffset(string_view* data) const {
206 if (dwarf64_) {
207 return ReadMemcpy<uint64_t>(data);
208 } else {
209 return ReadMemcpy<uint32_t>(data);
210 }
211 }
212
213 // Reads an address according to the expected address_size.
ReadAddress(string_view * data) const214 uint64_t ReadAddress(string_view* data) const {
215 if (address_size_ == 8) {
216 return ReadMemcpy<uint64_t>(data);
217 } else if (address_size_ == 4) {
218 return ReadMemcpy<uint32_t>(data);
219 } else {
220 BLOATY_UNREACHABLE();
221 }
222 }
223
224 // Reads an "initial length" as specified in many DWARF headers. This
225 // contains either a 32-bit or a 64-bit length, and signals whether we are
226 // using the 32-bit or 64-bit DWARF format (so it sets dwarf64 appropriately).
227 //
228 // Returns the range for this section and stores the remaining data
229 // in |remaining|.
ReadInitialLength(string_view * remaining)230 string_view ReadInitialLength(string_view* remaining) {
231 uint64_t len = ReadMemcpy<uint32_t>(remaining);
232
233 if (len == 0xffffffff) {
234 dwarf64_ = true;
235 len = ReadMemcpy<uint64_t>(remaining);
236 } else {
237 dwarf64_ = false;
238 }
239
240 if (remaining->size() < len) {
241 THROW("short DWARF compilation unit");
242 }
243
244 string_view unit = *remaining;
245 unit.remove_suffix(remaining->size() - len);
246 *remaining = remaining->substr(len);
247 return unit;
248 }
249
ReadDWARFVersion(string_view * data)250 void ReadDWARFVersion(string_view* data) {
251 dwarf_version_ = ReadMemcpy<uint16_t>(data);
252 }
253
254 private:
255 uint16_t dwarf_version_;
256 bool dwarf64_;
257 uint8_t address_size_;
258 };
259
260
261 // AbbrevTable /////////////////////////////////////////////////////////////////
262
263 // Parses and stores a representation of (a portion of) the .debug_abbrev
264 // section of a DWARF file. An abbreviation is defined by a unique "code"
265 // (unique within one table), and defines the DIE tag and set of attributes.
266 // The encoding of the DIE then contains just the abbreviation code and the
267 // attribute values -- thanks to the abbreviation table, the tag and attribute
268 // keys/names are not required.
269 //
270 // The abbreviations are an internal detail of the DWARF format and users should
271 // not need to care about them.
272
273 class AbbrevTable {
274 public:
275 // Reads abbreviations until a terminating abbreviation is seen.
276 string_view ReadAbbrevs(string_view data);
277
278 // In a DWARF abbreviation, each attribute has a name and a form.
279 struct Attribute {
280 uint16_t name;
281 uint8_t form;
282 };
283
284 // The representation of a single abbreviation.
285 struct Abbrev {
286 uint32_t code;
287 uint16_t tag;
288 bool has_child;
289 std::vector<Attribute> attr;
290 };
291
IsEmpty() const292 bool IsEmpty() const { return abbrev_.empty(); }
293
294 // Looks for an abbreviation with the given code. Returns true if the lookup
295 // succeeded.
GetAbbrev(uint32_t code,const Abbrev ** abbrev) const296 bool GetAbbrev(uint32_t code, const Abbrev** abbrev) const {
297 auto it = abbrev_.find(code);
298 if (it != abbrev_.end()) {
299 *abbrev = &it->second;
300 return true;
301 } else {
302 return false;
303 }
304 }
305
306 private:
307 // Keyed by abbreviation code.
308 // Generally we expect these to be small, so we could almost use a vector<>.
309 // But you never know what crazy input data is going to do...
310 std::unordered_map<uint32_t, Abbrev> abbrev_;
311 };
312
ReadAbbrevs(string_view data)313 string_view AbbrevTable::ReadAbbrevs(string_view data) {
314 while (true) {
315 uint32_t code = ReadLEB128<uint32_t>(&data);
316
317 if (code == 0) {
318 return data; // Terminator entry.
319 }
320
321 Abbrev& abbrev = abbrev_[code];
322
323 if (abbrev.code) {
324 THROW("DWARF data contained duplicate abbrev code");
325 }
326
327 uint8_t has_child;
328
329 abbrev.code = code;
330 abbrev.tag = ReadLEB128<uint16_t>(&data);
331 has_child = ReadMemcpy<uint8_t>(&data);
332
333 switch (has_child) {
334 case DW_children_yes:
335 abbrev.has_child = true;
336 break;
337 case DW_children_no:
338 abbrev.has_child = false;
339 break;
340 default:
341 THROW("DWARF has_child is neither true nor false.");
342 }
343
344 while (true) {
345 Attribute attr;
346 attr.name = ReadLEB128<uint16_t>(&data);
347 attr.form = ReadLEB128<uint8_t>(&data);
348
349 if (attr.name == 0 && attr.form == 0) {
350 break; // End of this abbrev
351 }
352
353 abbrev.attr.push_back(attr);
354 }
355 }
356 }
357
358
359 // StringTable /////////////////////////////////////////////////////////////////
360
361 // Represents the .debug_str portion of a DWARF file and contains code for
362 // reading strings out of it. This is an internal detail of the DWARF format
363 // and users should not need to care about it.
364
365 class StringTable {
366 public:
367 // Construct with the debug_str data from a DWARF file.
StringTable(string_view debug_str)368 StringTable(string_view debug_str) : debug_str_(debug_str) {}
369
370 // Read a string from the table.
371 string_view ReadEntry(size_t ofs) const;
372
373 private:
374 string_view debug_str_;
375 };
376
ReadEntry(size_t ofs) const377 string_view StringTable::ReadEntry(size_t ofs) const {
378 string_view str = debug_str_;
379 SkipBytes(ofs, &str);
380 return ReadNullTerminated(&str);
381 }
382
383
384 // AddressRanges ///////////////////////////////////////////////////////////////
385
386 // Code for reading address ranges out of .debug_aranges.
387
388 class AddressRanges {
389 public:
AddressRanges(string_view data)390 AddressRanges(string_view data) : section_(data), next_unit_(data) {}
391
392 // Offset into .debug_info for the current compilation unit.
debug_info_offset()393 uint64_t debug_info_offset() { return debug_info_offset_; }
394
395 // Address and length for this range.
address()396 uint64_t address() { return address_; }
length()397 uint64_t length() { return length_; }
398
399 // Advance to the next range. The values will be available in address() and
400 // length(). Returns false when the end of this compilation unit is hit.
401 // Must call this once before reading the first range.
402 bool NextRange();
403
404 // Advance to the next compilation unit. The unit offset will be available in
405 // debug_info_offset(). Must call this once before reading the first unit.
406 bool NextUnit();
407
408 private:
409 CompilationUnitSizes sizes_;
410 string_view section_;
411 string_view unit_remaining_;
412 string_view next_unit_;
413 uint64_t debug_info_offset_;
414 uint64_t address_;
415 uint64_t length_;
416 };
417
NextRange()418 bool AddressRanges::NextRange() {
419 if (unit_remaining_.empty()) {
420 return false;
421 }
422
423 address_ = sizes_.ReadAddress(&unit_remaining_);
424 length_ = sizes_.ReadAddress(&unit_remaining_);
425 return true;
426 }
427
NextUnit()428 bool AddressRanges::NextUnit() {
429 if (next_unit_.empty()) {
430 return false;
431 }
432
433 unit_remaining_ = sizes_.ReadInitialLength(&next_unit_);
434 sizes_.ReadDWARFVersion(&unit_remaining_);
435
436 if (sizes_.dwarf_version() > 4) {
437 THROW("DWARF data is too new for us");
438 }
439
440 debug_info_offset_ = sizes_.ReadDWARFOffset(&unit_remaining_);
441
442 uint8_t segment_size;
443
444 sizes_.SetAddressSize(ReadMemcpy<uint8_t>(&unit_remaining_));
445 segment_size = ReadMemcpy<uint8_t>(&unit_remaining_);
446
447 if (segment_size) {
448 THROW("we don't know how to handle segmented addresses.");
449 }
450
451 size_t ofs = unit_remaining_.data() - section_.data();
452 size_t aligned_ofs = AlignUpTo(ofs, sizes_.address_size() * 2);
453 SkipBytes(aligned_ofs - ofs, &unit_remaining_);
454 return true;
455 }
456
457
458 // LocationList ////////////////////////////////////////////////////////////////
459
460 // Code for reading entries out of a location list.
461 // For the moment we only care about finding the bounds of a list given its
462 // offset, so we don't actually vend any of the data.
463
464 class LocationList {
465 public:
LocationList(CompilationUnitSizes sizes,string_view data)466 LocationList(CompilationUnitSizes sizes, string_view data)
467 : sizes_(sizes), remaining_(data) {}
468
read_offset() const469 const char* read_offset() const { return remaining_.data(); }
470 bool NextEntry();
471
472 private:
473 CompilationUnitSizes sizes_;
474 string_view remaining_;
475 };
476
NextEntry()477 bool LocationList::NextEntry() {
478 uint64_t start, end;
479 start = sizes_.ReadAddress(&remaining_);
480 end = sizes_.ReadAddress(&remaining_);
481 if (start == 0 && end == 0) {
482 return false;
483 } else if (start == UINT64_MAX ||
484 (start == UINT32_MAX && sizes_.address_size() == 4)) {
485 // Base address selection, nothing more to do.
486 } else {
487 // Need to skip the location description.
488 uint16_t length = ReadMemcpy<uint16_t>(&remaining_);
489 SkipBytes(length, &remaining_);
490 }
491 return true;
492 }
493
GetLocationListRange(CompilationUnitSizes sizes,string_view available)494 string_view GetLocationListRange(CompilationUnitSizes sizes,
495 string_view available) {
496 LocationList list(sizes, available);
497 while (list.NextEntry()) {}
498 return available.substr(0, list.read_offset() - available.data());
499 }
500
501
502 // RangeList ///////////////////////////////////////////////////////////////////
503
504 // Code for reading entries out of a range list.
505 // For the moment we only care about finding the bounds of a list given its
506 // offset, so we don't actually vend any of the data.
507
508 class RangeList {
509 public:
RangeList(CompilationUnitSizes sizes,string_view data)510 RangeList(CompilationUnitSizes sizes, string_view data)
511 : sizes_(sizes), remaining_(data) {}
512
read_offset() const513 const char* read_offset() const { return remaining_.data(); }
514 bool NextEntry();
515
516 private:
517 CompilationUnitSizes sizes_;
518 string_view remaining_;
519 };
520
NextEntry()521 bool RangeList::NextEntry() {
522 uint64_t start, end;
523 start = sizes_.ReadAddress(&remaining_);
524 end = sizes_.ReadAddress(&remaining_);
525 if (start == 0 && end == 0) {
526 return false;
527 }
528 return true;
529 }
530
GetRangeListRange(CompilationUnitSizes sizes,string_view available)531 string_view GetRangeListRange(CompilationUnitSizes sizes,
532 string_view available) {
533 RangeList list(sizes, available);
534 while (list.NextEntry()) {
535 }
536 return available.substr(0, list.read_offset() - available.data());
537 }
538
539 // DIEReader ///////////////////////////////////////////////////////////////////
540
541 // Reads a sequence of DWARF DIE's (Debugging Information Entries) from the
542 // .debug_info or .debug_types section of a binary.
543 //
544 // Each DIE contains a tag and a set of attribute/value pairs. We rely on the
545 // abbreviations in an AbbrevTable to decode the DIEs.
546
547 class DIEReader {
548 public:
549 // Constructs a new DIEReader. Cannot be used until you call one of the
550 // Seek() methods below.
DIEReader(const File & file)551 DIEReader(const File& file) : dwarf_(file) {}
552
553 // Returns true if we are at the end of DIEs for this compilation unit.
IsEof() const554 bool IsEof() const { return state_ == State::kEof; }
555
556 // DIEs exist in both .debug_info and .debug_types.
557 enum class Section {
558 kDebugInfo,
559 kDebugTypes
560 };
561
562 // Seeks to the overall start or the start of a specific compilation unit.
563 // Note that |header_offset| is the offset of the compilation unit *header*,
564 // not the offset of the first DIE.
565 bool SeekToCompilationUnit(Section section, uint64_t header_offset);
SeekToStart(Section section)566 bool SeekToStart(Section section) {
567 return SeekToCompilationUnit(section, 0);
568 }
569
570 bool NextCompilationUnit();
571
572 // Advances to the next overall DIE, ignoring whether it happens to be a
573 // child, a sibling, or an uncle/aunt. Returns false at error or EOF.
574 bool NextDIE();
575
576 // Skips children of the current DIE, so that the next call to NextDIE()
577 // will read the next sibling (or parent, if no sibling exists).
578 bool SkipChildren();
579
GetAbbrev() const580 const AbbrevTable::Abbrev& GetAbbrev() const {
581 assert(!IsEof());
582 return *current_abbrev_;
583 }
584
585 // Returns the tag of the current DIE.
586 // Requires that ReadCode() has been called at least once.
GetTag() const587 uint16_t GetTag() const { return GetAbbrev().tag; }
588
589 // Returns whether the current DIE has a child.
590 // Requires that ReadCode() has been called at least once.
HasChild() const591 bool HasChild() const { return GetAbbrev().has_child; }
592
dwarf() const593 const File& dwarf() const { return dwarf_; }
594
unit_range() const595 string_view unit_range() const { return unit_range_; }
unit_sizes() const596 CompilationUnitSizes unit_sizes() const { return unit_sizes_; }
abbrev_version() const597 uint32_t abbrev_version() const { return abbrev_version_; }
debug_abbrev_offset() const598 uint64_t debug_abbrev_offset() const { return debug_abbrev_offset_; }
599
600 // If both compileunit_name and strp_sink are set, this will automatically
601 // call strp_sink->AddFileRange(compileunit_name, <string range>) for every
602 // DW_FORM_strp attribute encountered. These strings occur in the .debug_str
603 // section.
set_compileunit_name(absl::string_view name)604 void set_compileunit_name(absl::string_view name) {
605 unit_name_ = std::string(name);
606 }
set_strp_sink(RangeSink * sink)607 void set_strp_sink(RangeSink* sink) { strp_sink_ = sink; }
608
AddIndirectString(string_view range) const609 void AddIndirectString(string_view range) const {
610 if (strp_sink_) {
611 strp_sink_->AddFileRange("dwarf_strp", unit_name_, range);
612 }
613 }
614
615 private:
616 BLOATY_DISALLOW_COPY_AND_ASSIGN(DIEReader);
617
618 template<typename> friend class AttrReader;
619
620 // APIs for our friends to use to update our state.
621
622 // Call to get the current read head where attributes should be parsed.
ReadAttributesBegin()623 string_view ReadAttributesBegin() {
624 assert(state_ == State::kReadyToReadAttributes);
625 return remaining_;
626 }
627
628 // When some data has been parsed, this updates our read head.
ReadAttributesEnd(string_view remaining,uint64_t sibling)629 bool ReadAttributesEnd(string_view remaining, uint64_t sibling) {
630 assert(state_ == State::kReadyToReadAttributes);
631 if (remaining.data() == nullptr) {
632 THROW("premature EOF reading DWARF attributes");
633 } else {
634 remaining_ = remaining;
635 sibling_offset_ = sibling;
636 state_ = State::kReadyToNext;
637 return true;
638 }
639 }
640
641 // Internal APIs.
642
643 bool ReadCompilationUnitHeader();
644 bool ReadCode();
645
646 enum class State {
647 kReadyToReadAttributes,
648 kReadyToNext,
649 kEof,
650 } state_;
651
652 std::string error_;
653
654 const File& dwarf_;
655 RangeSink* strp_sink_ = nullptr;
656
657 // Abbreviation for the current entry.
658 const AbbrevTable::Abbrev* current_abbrev_;
659
660 // Our current read position.
661 string_view remaining_;
662 uint64_t sibling_offset_;
663 int depth_ = 0;
664
665 // Data for the next compilation unit.
666 string_view next_unit_;
667
668 // All of the AbbrevTables we've read from .debug_abbrev, indexed by their
669 // offset within .debug_abbrev.
670 std::unordered_map<uint64_t, AbbrevTable> abbrev_tables_;
671
672 // Whether we are in .debug_types or .debug_info.
673 Section section_;
674
675 // Information about the current compilation unit.
676 uint64_t debug_abbrev_offset_;
677 std::string unit_name_;
678 string_view unit_range_;
679 CompilationUnitSizes unit_sizes_;
680 AbbrevTable* unit_abbrev_;
681
682 // A small integer that uniquely identifies the combination of unit_abbrev_
683 // and unit_sizes_. Attribute readers use this to know when they can reuse an
684 // existing (abbrev code) -> (Actions) mapping, since this table depends on
685 // both the current abbrev. table and the sizes.
686 uint32_t abbrev_version_;
687
688 std::map<std::pair<AbbrevTable*, CompilationUnitSizes>, uint32_t>
689 abbrev_versions_;
690
691 // Only for .debug_types
692 uint64_t unit_type_signature_;
693 uint64_t unit_type_offset_;
694 };
695
ReadCode()696 bool DIEReader::ReadCode() {
697 uint32_t code;
698 again:
699 if (remaining_.empty()) {
700 state_ = State::kEof;
701 return false;
702 }
703 code = ReadLEB128<uint32_t>(&remaining_);
704 if (code == 0) {
705 // null entry terminates a chain of sibling entries.
706 depth_--;
707 goto again;
708 }
709
710 if (!unit_abbrev_->GetAbbrev(code, ¤t_abbrev_)) {
711 THROW("couldn't find abbreviation for code");
712 }
713 state_ = State::kReadyToReadAttributes;
714 sibling_offset_ = 0;
715
716 if (HasChild()) {
717 depth_++;
718 }
719
720 return true;
721 }
722
NextCompilationUnit()723 bool DIEReader::NextCompilationUnit() {
724 return ReadCompilationUnitHeader();
725 }
726
NextDIE()727 bool DIEReader::NextDIE() {
728 if (state_ == State::kEof) {
729 return false;
730 }
731
732 assert(state_ == State::kReadyToNext);
733 return ReadCode();
734 }
735
SeekToCompilationUnit(Section section,uint64_t offset)736 bool DIEReader::SeekToCompilationUnit(Section section, uint64_t offset) {
737 section_ = section;
738
739 if (section == Section::kDebugInfo) {
740 next_unit_ = dwarf_.debug_info;
741 } else {
742 next_unit_ = dwarf_.debug_types;
743 }
744
745 SkipBytes(offset, &next_unit_);
746 return ReadCompilationUnitHeader();
747 }
748
ReadCompilationUnitHeader()749 bool DIEReader::ReadCompilationUnitHeader() {
750 if (next_unit_.empty()) {
751 state_ = State::kEof;
752 return false;
753 }
754
755 unit_range_ = next_unit_;
756 remaining_ = unit_sizes_.ReadInitialLength(&next_unit_);
757 unit_range_ = unit_range_.substr(
758 0, remaining_.size() + (remaining_.data() - unit_range_.data()));
759
760 unit_sizes_.ReadDWARFVersion(&remaining_);
761
762 if (unit_sizes_.dwarf_version() > 4) {
763 THROW("Data is in new DWARF format we don't understand");
764 }
765
766 debug_abbrev_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
767 unit_abbrev_ = &abbrev_tables_[debug_abbrev_offset_];
768
769 // If we haven't already read abbreviations for this debug_abbrev_offset_, we
770 // need to do so now.
771 if (unit_abbrev_->IsEmpty()) {
772 string_view abbrev_data = dwarf_.debug_abbrev;
773 SkipBytes(debug_abbrev_offset_, &abbrev_data);
774 unit_abbrev_->ReadAbbrevs(abbrev_data);
775 }
776
777 unit_sizes_.SetAddressSize(ReadMemcpy<uint8_t>(&remaining_));
778
779 if (section_ == Section::kDebugTypes) {
780 unit_type_signature_ = ReadMemcpy<uint64_t>(&remaining_);
781 unit_type_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
782 }
783
784 auto abbrev_id = std::make_pair(unit_abbrev_, unit_sizes_);
785 auto insert_pair = abbrev_versions_.insert(
786 std::make_pair(abbrev_id, abbrev_versions_.size()));
787
788 // This will be either the newly inserted value or the existing one, if there
789 // was one.
790 abbrev_version_ = insert_pair.first->second;
791
792 return ReadCode();
793 }
794
795
796 // DWARF form parsing //////////////////////////////////////////////////////////
797
798 class AttrValue {
799 public:
AttrValue(uint64_t val)800 AttrValue(uint64_t val) : uint_(val), type_(Type::kUint) {}
AttrValue(string_view val)801 AttrValue(string_view val) : string_(val), type_(Type::kString) {}
802
803 enum class Type {
804 kUint,
805 kString
806 };
807
type() const808 Type type() const { return type_; }
IsUint() const809 bool IsUint() const { return type_ == Type::kUint; }
IsString() const810 bool IsString() const { return type_ == Type::kString; }
811
ToUint() const812 absl::optional<uint64_t> ToUint() const {
813 if (IsUint()) return uint_;
814 string_view str = string_;
815 switch (str.size()) {
816 case 1:
817 return ReadMemcpy<uint8_t>(&str);
818 case 2:
819 return ReadMemcpy<uint8_t>(&str);
820 case 4:
821 return ReadMemcpy<uint32_t>(&str);
822 case 8:
823 return ReadMemcpy<uint64_t>(&str);
824 }
825 return absl::nullopt;
826 }
827
GetUint() const828 uint64_t GetUint() const {
829 assert(type_ == Type::kUint);
830 return uint_;
831 }
832
GetString() const833 string_view GetString() const {
834 assert(type_ == Type::kString);
835 return string_;
836 }
837
838 private:
839 union {
840 uint64_t uint_;
841 string_view string_;
842 };
843
844 Type type_;
845 };
846
847 template <class D>
ReadBlock(string_view * data)848 string_view ReadBlock(string_view* data) {
849 D len = ReadMemcpy<D>(data);
850 return ReadPiece(len, data);
851 }
852
ReadVariableBlock(string_view * data)853 string_view ReadVariableBlock(string_view* data) {
854 uint64_t len = ReadLEB128<uint64_t>(data);
855 return ReadPiece(len, data);
856 }
857
858 template <class D>
ReadIndirectString(const DIEReader & reader,string_view * data)859 string_view ReadIndirectString(const DIEReader& reader, string_view* data) {
860 D ofs = ReadMemcpy<D>(data);
861 StringTable table(reader.dwarf().debug_str);
862 string_view ret = table.ReadEntry(ofs);
863 reader.AddIndirectString(ret);
864 return ret;
865 }
866
ParseAttr(const DIEReader & reader,uint8_t form,string_view * data)867 AttrValue ParseAttr(const DIEReader& reader, uint8_t form, string_view* data) {
868 switch (form) {
869 case DW_FORM_indirect: {
870 uint16_t indirect_form = ReadLEB128<uint16_t>(data);
871 if (indirect_form == DW_FORM_indirect) {
872 THROW("indirect attribute has indirect form type");
873 }
874 return ParseAttr(reader, indirect_form, data);
875 }
876 case DW_FORM_ref1:
877 return AttrValue(ReadMemcpy<uint8_t>(data));
878 case DW_FORM_ref2:
879 return AttrValue(ReadMemcpy<uint16_t>(data));
880 case DW_FORM_ref4:
881 return AttrValue(ReadMemcpy<uint32_t>(data));
882 case DW_FORM_ref_sig8:
883 case DW_FORM_ref8:
884 return AttrValue(ReadMemcpy<uint64_t>(data));
885 case DW_FORM_ref_udata:
886 return AttrValue(ReadLEB128<uint64_t>(data));
887 case DW_FORM_addr:
888 address_size:
889 switch (reader.unit_sizes().address_size()) {
890 case 4:
891 return AttrValue(ReadMemcpy<uint32_t>(data));
892 case 8:
893 return AttrValue(ReadMemcpy<uint64_t>(data));
894 default:
895 BLOATY_UNREACHABLE();
896 }
897 case DW_FORM_ref_addr:
898 if (reader.unit_sizes().dwarf_version() <= 2) {
899 goto address_size;
900 }
901 ABSL_FALLTHROUGH_INTENDED;
902 case DW_FORM_sec_offset:
903 if (reader.unit_sizes().dwarf64()) {
904 return AttrValue(ReadMemcpy<uint64_t>(data));
905 } else {
906 return AttrValue(ReadMemcpy<uint32_t>(data));
907 }
908 case DW_FORM_udata:
909 return AttrValue(ReadLEB128<uint64_t>(data));
910 case DW_FORM_block1:
911 return AttrValue(ReadBlock<uint8_t>(data));
912 case DW_FORM_block2:
913 return AttrValue(ReadBlock<uint16_t>(data));
914 case DW_FORM_block4:
915 return AttrValue(ReadBlock<uint32_t>(data));
916 case DW_FORM_block:
917 case DW_FORM_exprloc:
918 return AttrValue(ReadVariableBlock(data));
919 case DW_FORM_string:
920 return AttrValue(ReadNullTerminated(data));
921 case DW_FORM_strp:
922 if (reader.unit_sizes().dwarf64()) {
923 return AttrValue(ReadIndirectString<uint64_t>(reader, data));
924 } else {
925 return AttrValue(ReadIndirectString<uint32_t>(reader, data));
926 }
927 case DW_FORM_data1:
928 return AttrValue(ReadPiece(1, data));
929 case DW_FORM_data2:
930 return AttrValue(ReadPiece(2, data));
931 case DW_FORM_data4:
932 return AttrValue(ReadPiece(4, data));
933 case DW_FORM_data8:
934 return AttrValue(ReadPiece(8, data));
935
936 // Bloaty doesn't currently care about any bool or signed data.
937 // So we fudge it a bit and just stuff these in a uint64.
938 case DW_FORM_flag_present:
939 return AttrValue(1);
940 case DW_FORM_flag:
941 return AttrValue(ReadMemcpy<uint8_t>(data));
942 case DW_FORM_sdata:
943 return AttrValue(ReadLEB128<uint64_t>(data));
944 default:
945 THROWF("Don't know how to parse DWARF form: $0", form);
946 }
947 }
948
949
950 // AttrReader //////////////////////////////////////////////////////////////////
951
952 // Parses a DIE's attributes, calling user callbacks with the parsed values.
953
954 template <class T>
955 class AttrReader {
956 public:
957 typedef void CallbackFunc(T* container, AttrValue val);
958
OnAttribute(DwarfAttribute attr,CallbackFunc * func)959 void OnAttribute(DwarfAttribute attr, CallbackFunc* func) {
960 attributes_[attr] = func;
961 }
962
963 // Reads all attributes for this DIE, storing the ones we were expecting.
ReadAttributes(DIEReader * reader,T * container)964 void ReadAttributes(DIEReader* reader, T* container) {
965 string_view data = reader->ReadAttributesBegin();
966 const AbbrevTable::Abbrev& abbrev = reader->GetAbbrev();
967
968 for (auto attr : abbrev.attr) {
969 AttrValue value = ParseAttr(*reader, attr.form, &data);
970 auto it = attributes_.find(attr.name);
971 if (it != attributes_.end()) {
972 it->second(container, value);
973 }
974 }
975
976 reader->ReadAttributesEnd(data, 0);
977 }
978
979 private:
980 std::unordered_map<int, CallbackFunc*> attributes_;
981 };
982
983 // From DIEReader, defined here because it depends on FixedAttrReader.
SkipChildren()984 bool DIEReader::SkipChildren() {
985 assert(state_ == State::kReadyToNext);
986 if (!HasChild()) {
987 return true;
988 }
989
990 int target_depth = depth_ - 1;
991 dwarf::AttrReader<void> attr_reader;
992 while (depth_ > target_depth) {
993 // TODO(haberman): use DW_AT_sibling to optimize skipping when it is
994 // available.
995 if (!NextDIE()) {
996 return false;
997 }
998 attr_reader.ReadAttributes(this, nullptr);
999 }
1000 return true;
1001 }
1002
1003 // LineInfoReader //////////////////////////////////////////////////////////////
1004
1005 // Code to read the .line_info programs in a DWARF file.
1006
1007 class LineInfoReader {
1008 public:
LineInfoReader(const File & file)1009 LineInfoReader(const File& file) : file_(file), info_(0) {}
1010
1011 struct LineInfo {
LineInfobloaty::dwarf::LineInfoReader::LineInfo1012 LineInfo(bool default_is_stmt) : is_stmt(default_is_stmt) {}
1013 uint64_t address = 0;
1014 uint32_t file = 1;
1015 uint32_t line = 1;
1016 uint32_t column = 0;
1017 uint32_t discriminator = 0;
1018 bool end_sequence = false;
1019 bool basic_block = false;
1020 bool prologue_end = false;
1021 bool epilogue_begin = false;
1022 bool is_stmt;
1023 uint8_t op_index = 0;
1024 uint8_t isa = 0;
1025 };
1026
1027 struct FileName {
1028 string_view name;
1029 uint32_t directory_index;
1030 uint64_t modified_time;
1031 uint64_t file_size;
1032 };
1033
1034 void SeekToOffset(uint64_t offset, uint8_t address_size);
1035 bool ReadLineInfo();
lineinfo() const1036 const LineInfo& lineinfo() const { return info_; }
filename(size_t i) const1037 const FileName& filename(size_t i) const { return filenames_[i]; }
include_directory(size_t i) const1038 string_view include_directory(size_t i) const {
1039 return include_directories_[i];
1040 }
1041
GetExpandedFilename(size_t index)1042 const std::string& GetExpandedFilename(size_t index) {
1043 if (index >= filenames_.size()) {
1044 THROW("filename index out of range");
1045 }
1046
1047 // Generate these lazily.
1048 if (expanded_filenames_.size() <= index) {
1049 expanded_filenames_.resize(filenames_.size());
1050 }
1051
1052 std::string& ret = expanded_filenames_[index];
1053 if (ret.empty()) {
1054 const FileName& filename = filenames_[index];
1055 string_view directory = include_directories_[filename.directory_index];
1056 ret = std::string(directory);
1057 if (!ret.empty()) {
1058 ret += "/";
1059 }
1060 ret += std::string(filename.name);
1061 }
1062 return ret;
1063 }
1064
1065 private:
1066 struct Params {
1067 uint8_t minimum_instruction_length;
1068 uint8_t maximum_operations_per_instruction;
1069 uint8_t default_is_stmt;
1070 int8_t line_base;
1071 uint8_t line_range;
1072 uint8_t opcode_base;
1073 } params_;
1074
1075 const File& file_;
1076
1077 CompilationUnitSizes sizes_;
1078 std::vector<string_view> include_directories_;
1079 std::vector<FileName> filenames_;
1080 std::vector<uint8_t> standard_opcode_lengths_;
1081 std::vector<std::string> expanded_filenames_;
1082
1083 string_view remaining_;
1084
1085 // Whether we are in a "shadow" part of the bytecode program. Sometimes
1086 // parts of the line info program make it into the final binary even though
1087 // the corresponding code was stripped. We can tell when this happened by
1088 // looking for DW_LNE_set_address ops where the operand is 0. This
1089 // indicates that a relocation for that argument never got applied, which
1090 // probably means that the code got stripped.
1091 //
1092 // While this is true, we don't yield any LineInfo entries, because the
1093 // "address" value is garbage.
1094 bool shadow_;
1095
1096 LineInfo info_;
1097
DoAdvance(uint64_t advance,uint8_t max_per_instr)1098 void DoAdvance(uint64_t advance, uint8_t max_per_instr) {
1099 info_.address += params_.minimum_instruction_length *
1100 ((info_.op_index + advance) / max_per_instr);
1101 info_.op_index = (info_.op_index + advance) % max_per_instr;
1102 }
1103
Advance(uint64_t amount)1104 void Advance(uint64_t amount) {
1105 if (params_.maximum_operations_per_instruction == 1) {
1106 // This is by far the common case (only false on VLIW architectuers),
1107 // and this inlining/specialization avoids a costly division.
1108 DoAdvance(amount, 1);
1109 } else {
1110 DoAdvance(amount, params_.maximum_operations_per_instruction);
1111 }
1112 }
1113
AdjustedOpcode(uint8_t op)1114 uint8_t AdjustedOpcode(uint8_t op) { return op - params_.opcode_base; }
1115
SpecialOpcodeAdvance(uint8_t op)1116 void SpecialOpcodeAdvance(uint8_t op) {
1117 Advance(AdjustedOpcode(op) / params_.line_range);
1118 }
1119 };
1120
SeekToOffset(uint64_t offset,uint8_t address_size)1121 void LineInfoReader::SeekToOffset(uint64_t offset, uint8_t address_size) {
1122 string_view data = file_.debug_line;
1123 SkipBytes(offset, &data);
1124
1125 sizes_.SetAddressSize(address_size);
1126 data = sizes_.ReadInitialLength(&data);
1127 sizes_.ReadDWARFVersion(&data);
1128 uint64_t header_length = sizes_.ReadDWARFOffset(&data);
1129 string_view program = data;
1130 SkipBytes(header_length, &program);
1131
1132 params_.minimum_instruction_length = ReadMemcpy<uint8_t>(&data);
1133 if (sizes_.dwarf_version() == 4) {
1134 params_.maximum_operations_per_instruction = ReadMemcpy<uint8_t>(&data);
1135
1136 if (params_.maximum_operations_per_instruction == 0) {
1137 THROW("DWARF line info had maximum_operations_per_instruction=0");
1138 }
1139 } else {
1140 params_.maximum_operations_per_instruction = 1;
1141 }
1142 params_.default_is_stmt = ReadMemcpy<uint8_t>(&data);
1143 params_.line_base = ReadMemcpy<int8_t>(&data);
1144 params_.line_range = ReadMemcpy<uint8_t>(&data);
1145 params_.opcode_base = ReadMemcpy<uint8_t>(&data);
1146 if (params_.line_range == 0) {
1147 THROW("line_range of zero will cause divide by zero");
1148 }
1149
1150 standard_opcode_lengths_.resize(params_.opcode_base);
1151 for (size_t i = 1; i < params_.opcode_base; i++) {
1152 standard_opcode_lengths_[i] = ReadMemcpy<uint8_t>(&data);
1153 }
1154
1155 // Read include_directories.
1156 include_directories_.clear();
1157
1158 // Implicit current directory entry.
1159 include_directories_.push_back(string_view());
1160
1161 while (true) {
1162 string_view dir = ReadNullTerminated(&data);
1163 if (dir.empty()) {
1164 break;
1165 }
1166 include_directories_.push_back(dir);
1167 }
1168
1169 // Read file_names.
1170 filenames_.clear();
1171 expanded_filenames_.clear();
1172
1173 // Filename 0 is unused.
1174 filenames_.push_back(FileName());
1175 while (true) {
1176 FileName file_name;
1177 file_name.name = ReadNullTerminated(&data);
1178 if (file_name.name.empty()) {
1179 break;
1180 }
1181 file_name.directory_index = ReadLEB128<uint32_t>(&data);
1182 file_name.modified_time = ReadLEB128<uint64_t>(&data);
1183 file_name.file_size = ReadLEB128<uint64_t>(&data);
1184 if (file_name.directory_index >= include_directories_.size()) {
1185 THROW("directory index out of range");
1186 }
1187 filenames_.push_back(file_name);
1188 }
1189
1190 info_ = LineInfo(params_.default_is_stmt);
1191 remaining_ = program;
1192 shadow_ = false;
1193 }
1194
ReadLineInfo()1195 bool LineInfoReader::ReadLineInfo() {
1196 // Final step of last DW_LNS_copy / special opcode.
1197 info_.discriminator = 0;
1198 info_.basic_block = false;
1199 info_.prologue_end = false;
1200 info_.epilogue_begin = false;
1201
1202 // Final step of DW_LNE_end_sequence.
1203 info_.end_sequence = false;
1204
1205 string_view data = remaining_;
1206
1207 while (true) {
1208 if (data.empty()) {
1209 remaining_ = data;
1210 return false;
1211 }
1212
1213 uint8_t op = ReadMemcpy<uint8_t>(&data);
1214
1215 if (op >= params_.opcode_base) {
1216 SpecialOpcodeAdvance(op);
1217 info_.line +=
1218 params_.line_base + (AdjustedOpcode(op) % params_.line_range);
1219 if (!shadow_) {
1220 remaining_ = data;
1221 return true;
1222 }
1223 } else {
1224 switch (op) {
1225 case DW_LNS_extended_op: {
1226 uint16_t len = ReadLEB128<uint16_t>(&data);
1227 uint8_t extended_op = ReadMemcpy<uint8_t>(&data);
1228 switch (extended_op) {
1229 case DW_LNE_end_sequence: {
1230 // Preserve address and set end_sequence, but reset everything
1231 // else.
1232 uint64_t addr = info_.address;
1233 info_ = LineInfo(params_.default_is_stmt);
1234 info_.address = addr;
1235 info_.end_sequence = true;
1236 if (!shadow_) {
1237 remaining_ = data;
1238 return true;
1239 }
1240 break;
1241 }
1242 case DW_LNE_set_address:
1243 info_.address = sizes_.ReadAddress(&data);
1244 info_.op_index = 0;
1245 shadow_ = (info_.address == 0);
1246 break;
1247 case DW_LNE_define_file: {
1248 FileName file_name;
1249 file_name.name = ReadNullTerminated(&data);
1250 file_name.directory_index = ReadLEB128<uint32_t>(&data);
1251 file_name.modified_time = ReadLEB128<uint64_t>(&data);
1252 file_name.file_size = ReadLEB128<uint64_t>(&data);
1253 if (file_name.directory_index >= include_directories_.size()) {
1254 THROW("directory index out of range");
1255 }
1256 filenames_.push_back(file_name);
1257 break;
1258 }
1259 case DW_LNE_set_discriminator:
1260 info_.discriminator = ReadLEB128<uint32_t>(&data);
1261 break;
1262 default:
1263 // We don't understand this opcode, skip it.
1264 SkipBytes(len, &data);
1265 if (verbose_level > 0) {
1266 fprintf(stderr,
1267 "bloaty: warning: unknown DWARF line table extended "
1268 "opcode: %d\n",
1269 extended_op);
1270 }
1271 break;
1272 }
1273 break;
1274 }
1275 case DW_LNS_copy:
1276 if (!shadow_) {
1277 remaining_ = data;
1278 return true;
1279 }
1280 break;
1281 case DW_LNS_advance_pc:
1282 Advance(ReadLEB128<uint64_t>(&data));
1283 break;
1284 case DW_LNS_advance_line:
1285 info_.line += ReadLEB128<int32_t>(&data);
1286 break;
1287 case DW_LNS_set_file:
1288 info_.file = ReadLEB128<uint32_t>(&data);
1289 if (info_.file >= filenames_.size()) {
1290 THROW("filename index too big");
1291 }
1292 break;
1293 case DW_LNS_set_column:
1294 info_.column = ReadLEB128<uint32_t>(&data);
1295 break;
1296 case DW_LNS_negate_stmt:
1297 info_.is_stmt = !info_.is_stmt;
1298 break;
1299 case DW_LNS_set_basic_block:
1300 info_.basic_block = true;
1301 break;
1302 case DW_LNS_const_add_pc:
1303 SpecialOpcodeAdvance(255);
1304 break;
1305 case DW_LNS_fixed_advance_pc:
1306 info_.address += ReadMemcpy<uint16_t>(&data);
1307 info_.op_index = 0;
1308 break;
1309 case DW_LNS_set_prologue_end:
1310 info_.prologue_end = true;
1311 break;
1312 case DW_LNS_set_epilogue_begin:
1313 info_.epilogue_begin = true;
1314 break;
1315 case DW_LNS_set_isa:
1316 info_.isa = ReadLEB128<uint8_t>(&data);
1317 break;
1318 default:
1319 // Unknown opcode, but we know its length so can skip it.
1320 SkipBytes(standard_opcode_lengths_[op], &data);
1321 if (verbose_level > 0) {
1322 fprintf(stderr,
1323 "bloaty: warning: unknown DWARF line table opcode: %d\n",
1324 op);
1325 }
1326 break;
1327 }
1328 }
1329 }
1330 }
1331
1332 } // namespace dwarf
1333
1334 // Bloaty DWARF Data Sources ///////////////////////////////////////////////////
1335
1336 // The DWARF .debug_aranges section should, in theory, give us exactly the
1337 // information we need to map file ranges in linked binaries to compilation
1338 // units from where that code came. However, .debug_aranges is often incomplete
1339 // or missing completely, so we use it as just one of several data sources for
1340 // the "compileunits" data source.
ReadDWARFAddressRanges(const dwarf::File & file,RangeSink * sink)1341 static bool ReadDWARFAddressRanges(const dwarf::File& file, RangeSink* sink) {
1342 // Maps compilation unit offset -> source filename
1343 // Lazily initialized.
1344 class FilenameMap {
1345 public:
1346 FilenameMap(const dwarf::File& file)
1347 : die_reader_(file),
1348 missing_("[DWARF is missing filename]") {
1349 attr_reader_.OnAttribute(
1350 DW_AT_name, [](string_view* s, dwarf::AttrValue data) {
1351 if (!data.IsString()) return;
1352 *s = data.GetString();
1353 });
1354 }
1355
1356 std::string GetFilename(uint64_t compilation_unit_offset) {
1357 auto& name = map_[compilation_unit_offset];
1358 if (name.empty()) {
1359 name = LookupFilename(compilation_unit_offset);
1360 }
1361 return name;
1362 }
1363
1364 private:
1365 std::string LookupFilename(uint64_t compilation_unit_offset) {
1366 auto section = dwarf::DIEReader::Section::kDebugInfo;
1367 string_view name;
1368 if (die_reader_.SeekToCompilationUnit(section, compilation_unit_offset) &&
1369 die_reader_.GetTag() == DW_TAG_compile_unit &&
1370 (attr_reader_.ReadAttributes(&die_reader_, &name),
1371 !name.empty())) {
1372 return std::string(name);
1373 } else {
1374 return missing_;
1375 }
1376 }
1377
1378 dwarf::DIEReader die_reader_;
1379 dwarf::AttrReader<string_view> attr_reader_;
1380 std::unordered_map<uint64_t, std::string> map_;
1381 std::string missing_;
1382 } map(file);
1383
1384 dwarf::AddressRanges ranges(file.debug_aranges);
1385
1386 while (ranges.NextUnit()) {
1387 std::string filename = map.GetFilename(ranges.debug_info_offset());
1388
1389 while (ranges.NextRange()) {
1390 if (ranges.address() != 0) {
1391 sink->AddVMRangeIgnoreDuplicate("dwarf_aranges", ranges.address(),
1392 ranges.length(), filename);
1393 }
1394 }
1395 }
1396
1397 return true;
1398 }
1399
1400 // TODO(haberman): make these into real protobufs once proto supports
1401 // string_view.
1402 class GeneralDIE {
1403 public:
has_name() const1404 bool has_name() const { return has_name_; }
has_linkage_name() const1405 bool has_linkage_name() const { return has_linkage_name_; }
has_location_string() const1406 bool has_location_string() const { return has_location_string_; }
has_low_pc() const1407 bool has_low_pc() const { return has_low_pc_; }
has_high_pc() const1408 bool has_high_pc() const { return has_high_pc_; }
has_location_uint64() const1409 bool has_location_uint64() const { return has_location_uint64_; }
has_stmt_list() const1410 bool has_stmt_list() const { return has_stmt_list_; }
has_ranges() const1411 bool has_ranges() const { return has_ranges_; }
has_start_scope() const1412 bool has_start_scope() const { return has_start_scope_; }
1413
DebugString()1414 std::string DebugString() {
1415 std::string ret;
1416 if (has_name()) {
1417 ret += absl::Substitute("name: $0\n", name());
1418 }
1419 if (has_linkage_name()) {
1420 ret += absl::Substitute("linkage_name: $0\n", linkage_name());
1421 }
1422 if (has_location_string()) {
1423 ret += absl::Substitute("location_string: $0\n", location_string());
1424 }
1425 if (has_low_pc()) {
1426 ret += absl::Substitute("low_pc: $0\n", low_pc());
1427 }
1428 if (has_high_pc()) {
1429 ret += absl::Substitute("high_pc: $0\n", high_pc());
1430 }
1431 if (has_location_uint64()) {
1432 ret += absl::Substitute("location_uint64: $0\n", location_uint64());
1433 }
1434 if (has_stmt_list()) {
1435 ret += absl::Substitute("stmt_list: $0\n", stmt_list());
1436 }
1437 if (has_ranges()) {
1438 ret += absl::Substitute("ranges: $0\n", ranges());
1439 }
1440 if (has_start_scope()) {
1441 ret += absl::Substitute("start_scope: $0\n", start_scope());
1442 }
1443 return ret;
1444 }
1445
name() const1446 string_view name() const { return name_; }
linkage_name() const1447 string_view linkage_name() const { return linkage_name_; }
location_string() const1448 string_view location_string() const { return location_string_; }
low_pc() const1449 uint64_t low_pc() const { return low_pc_; }
high_pc() const1450 uint64_t high_pc() const { return high_pc_; }
location_uint64() const1451 uint64_t location_uint64() const { return location_uint64_; }
stmt_list() const1452 uint64_t stmt_list() const { return stmt_list_; }
ranges() const1453 uint64_t ranges() const { return ranges_; }
start_scope() const1454 uint64_t start_scope() const { return start_scope_; }
1455
set_name(string_view val)1456 void set_name(string_view val) {
1457 has_name_ = true;
1458 name_ = val;
1459 }
set_linkage_name(string_view val)1460 void set_linkage_name(string_view val) {
1461 has_linkage_name_ = true;
1462 location_string_ = val;
1463 }
set_location_string(string_view val)1464 void set_location_string(string_view val) {
1465 has_location_string_ = true;
1466 location_string_ = val;
1467 }
set_low_pc(uint64_t val)1468 void set_low_pc(uint64_t val) {
1469 has_low_pc_ = true;
1470 low_pc_ = val;
1471 }
set_high_pc(uint64_t val)1472 void set_high_pc(uint64_t val) {
1473 has_high_pc_ = true;
1474 high_pc_ = val;
1475 }
set_location_uint64(uint64_t val)1476 void set_location_uint64(uint64_t val) {
1477 has_location_uint64_ = true;
1478 location_uint64_ = val;
1479 }
set_stmt_list(uint64_t val)1480 void set_stmt_list(uint64_t val) {
1481 has_stmt_list_ = true;
1482 stmt_list_ = val;
1483 }
set_ranges(uint64_t val)1484 void set_ranges(uint64_t val) {
1485 has_ranges_ = true;
1486 ranges_ = val;
1487 }
set_start_scope(uint64_t val)1488 void set_start_scope(uint64_t val) {
1489 has_start_scope_ = true;
1490 start_scope_ = val;
1491 }
1492
1493 private:
1494 bool has_name_ = false;
1495 bool has_linkage_name_ = false;
1496 bool has_location_string_ = false;
1497 bool has_low_pc_ = false;
1498 bool has_high_pc_ = false;
1499 bool has_location_uint64_ = false;
1500 bool has_stmt_list_ = false;
1501 bool has_ranges_ = false;
1502 bool has_start_scope_ = false;
1503
1504 string_view name_;
1505 string_view linkage_name_;
1506 string_view location_string_;
1507 uint64_t low_pc_ = 0;
1508 uint64_t high_pc_ = 0;
1509 uint64_t location_uint64_ = 0;
1510 uint64_t stmt_list_ = 0;
1511 uint64_t ranges_ = 0;
1512 uint64_t start_scope_ = 0;
1513 };
1514
1515 class InlinesDIE {
1516 public:
has_stmt_list() const1517 bool has_stmt_list() const { return has_stmt_list_; }
1518
stmt_list() const1519 uint64_t stmt_list() const { return stmt_list_; }
1520
set_stmt_list(uint64_t val)1521 void set_stmt_list(uint64_t val) {
1522 has_stmt_list_ = true;
1523 stmt_list_ = val;
1524 }
1525
1526 private:
1527 bool has_stmt_list_ = false;
1528 uint64_t stmt_list_ = 0;
1529 };
1530
AddDIE(const dwarf::File & file,const std::string & name,const GeneralDIE & die,const SymbolTable & symtab,const DualMap & symbol_map,const dwarf::CompilationUnitSizes & sizes,RangeSink * sink)1531 void AddDIE(const dwarf::File& file, const std::string& name,
1532 const GeneralDIE& die, const SymbolTable& symtab,
1533 const DualMap& symbol_map, const dwarf::CompilationUnitSizes& sizes,
1534 RangeSink* sink) {
1535 // Some DIEs mark address ranges with high_pc/low_pc pairs (especially
1536 // functions).
1537 if (die.has_low_pc() && die.has_high_pc() && die.low_pc() != 0) {
1538 uint64_t high_pc = die.high_pc();
1539
1540 // It appears that some compilers make high_pc a size, and others make it an
1541 // address.
1542 if (high_pc >= die.low_pc()) {
1543 high_pc -= die.low_pc();
1544 }
1545 sink->AddVMRangeIgnoreDuplicate("dwarf_pcpair", die.low_pc(), high_pc,
1546 name);
1547 }
1548
1549 // Sometimes a DIE has a linkage_name, which we can look up in the symbol
1550 // table.
1551 if (die.has_linkage_name()) {
1552 auto it = symtab.find(die.linkage_name());
1553 if (it != symtab.end()) {
1554 sink->AddVMRangeIgnoreDuplicate("dwarf_linkagename", it->second.first,
1555 it->second.second, name);
1556 }
1557 }
1558
1559 // Sometimes the DIE has a "location", which gives the location as an address.
1560 // This parses a very small subset of the overall DWARF expression grammar.
1561 if (die.has_location_string()) {
1562 string_view location = die.location_string();
1563 if (location.size() == sizes.address_size() + 1 &&
1564 location[0] == DW_OP_addr) {
1565 location.remove_prefix(1);
1566 uint64_t addr;
1567 // TODO(haberman): endian?
1568 if (sizes.address_size() == 4) {
1569 addr = dwarf::ReadMemcpy<uint32_t>(&location);
1570 } else if (sizes.address_size() == 8) {
1571 addr = dwarf::ReadMemcpy<uint64_t>(&location);
1572 } else {
1573 BLOATY_UNREACHABLE();
1574 }
1575
1576 // Unfortunately the location doesn't include a size, so we look that part
1577 // up in the symbol map.
1578 uint64_t size;
1579 if (symbol_map.vm_map.TryGetSize(addr, &size)) {
1580 sink->AddVMRangeIgnoreDuplicate("dwarf_location", addr, size, name);
1581 } else {
1582 if (verbose_level > 0) {
1583 fprintf(stderr,
1584 "bloaty: warning: couldn't find DWARF location in symbol "
1585 "table, address: %" PRIx64 "\n",
1586 addr);
1587 }
1588 }
1589 }
1590 }
1591
1592 // Sometimes a location is given as an offset into debug_loc.
1593 if (die.has_location_uint64()) {
1594 if (die.location_uint64() < file.debug_loc.size()) {
1595 absl::string_view loc_range = file.debug_loc.substr(die.location_uint64());
1596 loc_range = GetLocationListRange(sizes, loc_range);
1597 sink->AddFileRange("dwarf_locrange", name, loc_range);
1598 } else if (verbose_level > 0) {
1599 fprintf(stderr,
1600 "bloaty: warning: DWARF location out of range, location=%" PRIx64
1601 "\n",
1602 die.location_uint64());
1603 }
1604 }
1605
1606 uint64_t ranges_offset = UINT64_MAX;
1607
1608 // There are two different attributes that sometimes contain an offset into
1609 // debug_ranges.
1610 if (die.has_ranges()) {
1611 ranges_offset = die.ranges();
1612 } else if (die.has_start_scope()) {
1613 ranges_offset = die.start_scope();
1614 }
1615
1616 if (ranges_offset != UINT64_MAX) {
1617 if (ranges_offset < file.debug_ranges.size()) {
1618 absl::string_view ranges_range = file.debug_ranges.substr(ranges_offset);
1619 ranges_range = GetRangeListRange(sizes, ranges_range);
1620 sink->AddFileRange("dwarf_debugrange", name, ranges_range);
1621 } else if (verbose_level > 0) {
1622 fprintf(stderr,
1623 "bloaty: warning: DWARF debug range out of range, "
1624 "ranges_offset=%" PRIx64 "\n",
1625 ranges_offset);
1626 }
1627 }
1628 }
1629
ReadDWARFPubNames(const dwarf::File & file,string_view section,RangeSink * sink)1630 static void ReadDWARFPubNames(const dwarf::File& file, string_view section,
1631 RangeSink* sink) {
1632 dwarf::DIEReader die_reader(file);
1633 dwarf::AttrReader<string_view> attr_reader;
1634 string_view remaining = section;
1635
1636 attr_reader.OnAttribute(
1637 DW_AT_name, [](string_view* s, dwarf::AttrValue data) {
1638 if (data.type() == dwarf::AttrValue::Type::kString) {
1639 *s = data.GetString();
1640 }
1641 });
1642
1643 while (remaining.size() > 0) {
1644 dwarf::CompilationUnitSizes sizes;
1645 string_view full_unit = remaining;
1646 string_view unit = sizes.ReadInitialLength(&remaining);
1647 full_unit =
1648 full_unit.substr(0, unit.size() + (unit.data() - full_unit.data()));
1649 sizes.ReadDWARFVersion(&unit);
1650 uint64_t debug_info_offset = sizes.ReadDWARFOffset(&unit);
1651 bool ok = die_reader.SeekToCompilationUnit(
1652 dwarf::DIEReader::Section::kDebugInfo, debug_info_offset);
1653 if (!ok) {
1654 THROW("Couldn't seek to debug_info section");
1655 }
1656 string_view compileunit_name;
1657 attr_reader.ReadAttributes(&die_reader, &compileunit_name);
1658 if (!compileunit_name.empty()) {
1659 sink->AddFileRange("dwarf_pubnames", compileunit_name, full_unit);
1660 }
1661 }
1662 }
1663
ReadEncodedPointer(uint8_t encoding,bool is_64bit,string_view * data,const char * data_base,RangeSink * sink)1664 uint64_t ReadEncodedPointer(uint8_t encoding, bool is_64bit, string_view* data,
1665 const char* data_base, RangeSink* sink) {
1666 uint64_t value;
1667 const char* ptr = data->data();
1668 uint8_t format = encoding & DW_EH_PE_FORMAT_MASK;
1669
1670 switch (format) {
1671 case DW_EH_PE_omit:
1672 return 0;
1673 case DW_EH_PE_absptr:
1674 if (is_64bit) {
1675 value = dwarf::ReadMemcpy<uint64_t>(data);
1676 } else {
1677 value = dwarf::ReadMemcpy<uint32_t>(data);
1678 }
1679 break;
1680 case DW_EH_PE_uleb128:
1681 value = dwarf::ReadLEB128<uint64_t>(data);
1682 break;
1683 case DW_EH_PE_udata2:
1684 value = dwarf::ReadMemcpy<uint16_t>(data);
1685 break;
1686 case DW_EH_PE_udata4:
1687 value = dwarf::ReadMemcpy<uint32_t>(data);
1688 break;
1689 case DW_EH_PE_udata8:
1690 value = dwarf::ReadMemcpy<uint64_t>(data);
1691 break;
1692 case DW_EH_PE_sleb128:
1693 value = dwarf::ReadLEB128<int64_t>(data);
1694 break;
1695 case DW_EH_PE_sdata2:
1696 value = dwarf::ReadMemcpy<int16_t>(data);
1697 break;
1698 case DW_EH_PE_sdata4:
1699 value = dwarf::ReadMemcpy<int32_t>(data);
1700 break;
1701 case DW_EH_PE_sdata8:
1702 value = dwarf::ReadMemcpy<int64_t>(data);
1703 break;
1704 default:
1705 THROWF("Unexpected eh_frame format value: $0", format);
1706 }
1707
1708 uint8_t application = encoding & DW_EH_PE_APPLICATION_MASK;
1709
1710 switch (application) {
1711 case 0:
1712 break;
1713 case DW_EH_PE_pcrel:
1714 value += sink->TranslateFileToVM(ptr);
1715 break;
1716 case DW_EH_PE_datarel:
1717 if (data_base == nullptr) {
1718 THROW("datarel requested but no data_base provided");
1719 }
1720 value += sink->TranslateFileToVM(data_base);
1721 break;
1722 case DW_EH_PE_textrel:
1723 case DW_EH_PE_funcrel:
1724 case DW_EH_PE_aligned:
1725 THROWF("Unimplemented eh_frame application value: $0", application);
1726 }
1727
1728 if (encoding & DW_EH_PE_indirect) {
1729 string_view location = sink->TranslateVMToFile(value);
1730 if (is_64bit) {
1731 value = dwarf::ReadMemcpy<uint64_t>(&location);
1732 } else {
1733 value = dwarf::ReadMemcpy<uint32_t>(&location);
1734 }
1735 }
1736
1737 return value;
1738 }
1739
1740 // Code to read the .eh_frame section. This is not technically DWARF, but it
1741 // is similar to .debug_frame (which is DWARF) so it's convenient to put it
1742 // here.
1743 //
1744 // The best documentation I can find for this format comes from:
1745 //
1746 // *
1747 // http://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
1748 // * https://www.airs.com/blog/archives/460
1749 //
1750 // However these are both under-specified. Some details are not mentioned in
1751 // either of these (for example, the fact that the function length uses the FDE
1752 // encoding, but always absolute). libdwarf's implementation contains a comment
1753 // saying "It is not clear if this is entirely correct". Basically the only
1754 // thing you can trust for some of these details is the code that actually
1755 // implements unwinding in production:
1756 //
1757 // * libunwind http://www.nongnu.org/libunwind/
1758 // https://github.com/pathscale/libunwind/blob/master/src/dwarf/Gfde.c
1759 // * LLVM libunwind (a different project!!)
1760 // https://github.com/llvm-mirror/libunwind/blob/master/src/DwarfParser.hpp
1761 // * libgcc
1762 // https://github.com/gcc-mirror/gcc/blob/master/libgcc/unwind-dw2-fde.c
ReadEhFrame(string_view data,RangeSink * sink)1763 void ReadEhFrame(string_view data, RangeSink* sink) {
1764 string_view remaining = data;
1765
1766 struct CIEInfo {
1767 int version = 0;
1768 uint32_t code_align = 0;
1769 int32_t data_align = 0;
1770 uint8_t fde_encoding = 0;
1771 uint8_t lsda_encoding = 0;
1772 bool is_signal_handler = false;
1773 bool has_augmentation_length = false;
1774 uint64_t personality_function = 0;
1775 uint32_t return_address_reg = 0;
1776 };
1777
1778 std::unordered_map<const void*, CIEInfo> cie_map;
1779
1780 while (remaining.size() > 0) {
1781 dwarf::CompilationUnitSizes sizes;
1782 string_view full_entry = remaining;
1783 string_view entry = sizes.ReadInitialLength(&remaining);
1784 if (entry.size() == 0 && remaining.size() == 0) {
1785 return;
1786 }
1787 full_entry =
1788 full_entry.substr(0, entry.size() + (entry.data() - full_entry.data()));
1789 uint32_t id = dwarf::ReadMemcpy<uint32_t>(&entry);
1790 if (id == 0) {
1791 // CIE, we don't attribute this yet.
1792 CIEInfo& cie_info = cie_map[full_entry.data()];
1793 cie_info.version = dwarf::ReadMemcpy<uint8_t>(&entry);
1794 string_view aug_string = dwarf::ReadNullTerminated(&entry);
1795 cie_info.code_align = dwarf::ReadLEB128<uint32_t>(&entry);
1796 cie_info.data_align = dwarf::ReadLEB128<int32_t>(&entry);
1797 switch (cie_info.version) {
1798 case 1:
1799 cie_info.return_address_reg = dwarf::ReadMemcpy<uint8_t>(&entry);
1800 break;
1801 case 3:
1802 cie_info.return_address_reg = dwarf::ReadLEB128<uint32_t>(&entry);
1803 break;
1804 default:
1805 THROW("Unexpected eh_frame CIE version");
1806 }
1807 while (aug_string.size() > 0) {
1808 switch (aug_string[0]) {
1809 case 'z':
1810 // Length until the end of augmentation data.
1811 cie_info.has_augmentation_length = true;
1812 dwarf::ReadLEB128<uint32_t>(&entry);
1813 break;
1814 case 'L':
1815 cie_info.lsda_encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
1816 break;
1817 case 'R':
1818 cie_info.fde_encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
1819 break;
1820 case 'S':
1821 cie_info.is_signal_handler = true;
1822 break;
1823 case 'P': {
1824 uint8_t encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
1825 cie_info.personality_function =
1826 ReadEncodedPointer(encoding, true, &entry, nullptr, sink);
1827 break;
1828 }
1829 default:
1830 THROW("Unexepcted augmentation character");
1831 }
1832 aug_string.remove_prefix(1);
1833 }
1834 } else {
1835 auto iter = cie_map.find(entry.data() - id - 4);
1836 if (iter == cie_map.end()) {
1837 THROW("Couldn't find CIE for FDE");
1838 }
1839 const CIEInfo& cie_info = iter->second;
1840 // TODO(haberman): don't hard-code 64-bit.
1841 uint64_t address = ReadEncodedPointer(cie_info.fde_encoding, true, &entry,
1842 nullptr, sink);
1843 // TODO(haberman); Technically the FDE addresses could span a
1844 // function/compilation unit? They can certainly span inlines.
1845 /*
1846 uint64_t length =
1847 ReadEncodedPointer(cie_info.fde_encoding & 0xf, true, &entry, sink);
1848 (void)length;
1849
1850 if (cie_info.has_augmentation_length) {
1851 uint32_t augmentation_length = dwarf::ReadLEB128<uint32_t>(&entry);
1852 (void)augmentation_length;
1853 }
1854
1855 uint64_t lsda =
1856 ReadEncodedPointer(cie_info.lsda_encoding, true, &entry, sink);
1857 if (lsda) {
1858 }
1859 */
1860
1861 sink->AddFileRangeForVMAddr("dwarf_fde", address, full_entry);
1862 }
1863 }
1864 }
1865
1866 // See documentation here:
1867 // http://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html#EHFRAME
ReadEhFrameHdr(string_view data,RangeSink * sink)1868 void ReadEhFrameHdr(string_view data, RangeSink* sink) {
1869 const char* base = data.data();
1870 uint8_t version = dwarf::ReadMemcpy<uint8_t>(&data);
1871 uint8_t eh_frame_ptr_enc = dwarf::ReadMemcpy<uint8_t>(&data);
1872 uint8_t fde_count_enc = dwarf::ReadMemcpy<uint8_t>(&data);
1873 uint8_t table_enc = dwarf::ReadMemcpy<uint8_t>(&data);
1874
1875 if (version != 1) {
1876 THROWF("Unknown eh_frame_hdr version: $0", version);
1877 }
1878
1879 // TODO(haberman): don't hard-code 64-bit.
1880 uint64_t eh_frame_ptr =
1881 ReadEncodedPointer(eh_frame_ptr_enc, true, &data, base, sink);
1882 (void)eh_frame_ptr;
1883 uint64_t fde_count =
1884 ReadEncodedPointer(fde_count_enc, true, &data, base, sink);
1885
1886 for (uint64_t i = 0; i < fde_count; i++) {
1887 string_view entry_data = data;
1888 uint64_t initial_location =
1889 ReadEncodedPointer(table_enc, true, &data, base, sink);
1890 uint64_t fde_addr = ReadEncodedPointer(table_enc, true, &data, base, sink);
1891 entry_data.remove_suffix(data.size());
1892 sink->AddFileRangeForVMAddr("dwarf_fde_table", initial_location,
1893 entry_data);
1894
1895 // We could add fde_addr with an unknown length if we wanted to skip reading
1896 // eh_frame. We can't count on this table being available though, so we
1897 // don't want to remove the eh_frame reading code altogether.
1898 (void)fde_addr;
1899 }
1900 }
1901
ReadDWARFStmtListRange(const dwarf::File & file,uint64_t offset,string_view unit_name,RangeSink * sink)1902 static void ReadDWARFStmtListRange(const dwarf::File& file, uint64_t offset,
1903 string_view unit_name, RangeSink* sink) {
1904 string_view data = file.debug_line;
1905 dwarf::SkipBytes(offset, &data);
1906 string_view data_with_length = data;
1907 dwarf::CompilationUnitSizes sizes;
1908 data = sizes.ReadInitialLength(&data);
1909 data = data_with_length.substr(
1910 0, data.size() + (data.data() - data_with_length.data()));
1911 sink->AddFileRange("dwarf_stmtlistrange", unit_name, data);
1912 }
1913
1914 // The DWARF debug info can help us get compileunits info. DIEs for compilation
1915 // units, functions, and global variables often have attributes that will
1916 // resolve to addresses.
ReadDWARFDebugInfo(const dwarf::File & file,dwarf::DIEReader::Section section,const SymbolTable & symtab,const DualMap & symbol_map,RangeSink * sink,std::unordered_map<uint64_t,std::string> * stmt_list_map)1917 static void ReadDWARFDebugInfo(
1918 const dwarf::File& file, dwarf::DIEReader::Section section,
1919 const SymbolTable& symtab, const DualMap& symbol_map, RangeSink* sink,
1920 std::unordered_map<uint64_t, std::string>* stmt_list_map) {
1921 dwarf::DIEReader die_reader(file);
1922 die_reader.set_strp_sink(sink);
1923 dwarf::AttrReader<GeneralDIE> attr_reader;
1924
1925 attr_reader.OnAttribute(DW_AT_name,
1926 [](GeneralDIE* die, dwarf::AttrValue val) {
1927 if (!val.IsString()) return;
1928 die->set_name(val.GetString());
1929 });
1930 attr_reader.OnAttribute(DW_AT_linkage_name,
1931 [](GeneralDIE* die, dwarf::AttrValue val) {
1932 if (!val.IsString()) return;
1933 die->set_linkage_name(val.GetString());
1934 });
1935 attr_reader.OnAttribute(DW_AT_location,
1936 [](GeneralDIE* die, dwarf::AttrValue val) {
1937 if (val.IsString()) {
1938 die->set_location_string(val.GetString());
1939 } else {
1940 die->set_location_uint64(val.GetUint());
1941 }
1942 });
1943 attr_reader.OnAttribute(DW_AT_low_pc,
1944 [](GeneralDIE* die, dwarf::AttrValue val) {
1945 absl::optional<uint64_t> uint = val.ToUint();
1946 if (!uint.has_value()) return;
1947 die->set_low_pc(uint.value());
1948 });
1949 attr_reader.OnAttribute(DW_AT_high_pc,
1950 [](GeneralDIE* die, dwarf::AttrValue val) {
1951 absl::optional<uint64_t> uint = val.ToUint();
1952 if (!uint.has_value()) return;
1953 die->set_high_pc(uint.value());
1954 });
1955 attr_reader.OnAttribute(DW_AT_stmt_list,
1956 [](GeneralDIE* die, dwarf::AttrValue val) {
1957 absl::optional<uint64_t> uint = val.ToUint();
1958 if (!uint.has_value()) return;
1959 die->set_stmt_list(uint.value());
1960 });
1961 attr_reader.OnAttribute(DW_AT_ranges,
1962 [](GeneralDIE* die, dwarf::AttrValue val) {
1963 absl::optional<uint64_t> uint = val.ToUint();
1964 if (!uint.has_value()) return;
1965 die->set_ranges(uint.value());
1966 });
1967 attr_reader.OnAttribute(DW_AT_start_scope,
1968 [](GeneralDIE* die, dwarf::AttrValue val) {
1969 absl::optional<uint64_t> uint = val.ToUint();
1970 if (!uint.has_value()) return;
1971 die->set_start_scope(uint.value());
1972 });
1973
1974 if (!die_reader.SeekToStart(section)) {
1975 return;
1976 }
1977
1978 do {
1979 GeneralDIE compileunit_die;
1980 attr_reader.ReadAttributes(&die_reader, &compileunit_die);
1981 std::string compileunit_name = std::string(compileunit_die.name());
1982
1983 if (compileunit_die.has_stmt_list()) {
1984 uint64_t stmt_list = compileunit_die.stmt_list();
1985 if (compileunit_name.empty()) {
1986 auto iter = stmt_list_map->find(stmt_list);
1987 if (iter != stmt_list_map->end()) {
1988 compileunit_name = iter->second;
1989 }
1990 } else {
1991 (*stmt_list_map)[stmt_list] = compileunit_name;
1992 }
1993 }
1994
1995 if (compileunit_name.empty()) {
1996 continue;
1997 }
1998
1999 die_reader.set_compileunit_name(compileunit_name);
2000 sink->AddFileRange("dwarf_debuginfo", compileunit_name,
2001 die_reader.unit_range());
2002 AddDIE(file, compileunit_name, compileunit_die, symtab, symbol_map,
2003 die_reader.unit_sizes(), sink);
2004
2005 if (compileunit_die.has_stmt_list()) {
2006 uint64_t offset = compileunit_die.stmt_list();
2007 ReadDWARFStmtListRange(file, offset, compileunit_name, sink);
2008 }
2009
2010 string_view abbrev_data = file.debug_abbrev;
2011 dwarf::SkipBytes(die_reader.debug_abbrev_offset(), &abbrev_data);
2012 dwarf::AbbrevTable unit_abbrev;
2013 abbrev_data = unit_abbrev.ReadAbbrevs(abbrev_data);
2014 sink->AddFileRange("dwarf_abbrev", compileunit_name, abbrev_data);
2015
2016 while (die_reader.NextDIE()) {
2017 GeneralDIE die;
2018 attr_reader.ReadAttributes(&die_reader, &die);
2019
2020 // low_pc == 0 is a signal that this routine was stripped out of the
2021 // final binary. Skip this DIE and all of its children.
2022 if (die.has_low_pc() && die.low_pc() == 0) {
2023 die_reader.SkipChildren();
2024 } else {
2025 AddDIE(file, compileunit_name, die, symtab, symbol_map,
2026 die_reader.unit_sizes(), sink);
2027 }
2028 }
2029 } while (die_reader.NextCompilationUnit());
2030 }
2031
ReadDWARFCompileUnits(const dwarf::File & file,const SymbolTable & symtab,const DualMap & symbol_map,RangeSink * sink)2032 void ReadDWARFCompileUnits(const dwarf::File& file, const SymbolTable& symtab,
2033 const DualMap& symbol_map, RangeSink* sink) {
2034 if (!file.debug_info.size()) {
2035 THROW("missing debug info");
2036 }
2037
2038 if (file.debug_aranges.size()) {
2039 ReadDWARFAddressRanges(file, sink);
2040 }
2041
2042 std::unordered_map<uint64_t, std::string> stmt_list_map;
2043 ReadDWARFDebugInfo(file, dwarf::DIEReader::Section::kDebugInfo, symtab,
2044 symbol_map, sink, &stmt_list_map);
2045 ReadDWARFDebugInfo(file, dwarf::DIEReader::Section::kDebugTypes, symtab,
2046 symbol_map, sink, &stmt_list_map);
2047 ReadDWARFPubNames(file, file.debug_pubnames, sink);
2048 ReadDWARFPubNames(file, file.debug_pubtypes, sink);
2049 }
2050
LineInfoKey(const std::string & file,uint32_t line,bool include_line)2051 static std::string LineInfoKey(const std::string& file, uint32_t line,
2052 bool include_line) {
2053 if (include_line) {
2054 return file + ":" + std::to_string(line);
2055 } else {
2056 return file;
2057 }
2058 }
2059
ReadDWARFStmtList(bool include_line,dwarf::LineInfoReader * line_info_reader,RangeSink * sink)2060 static void ReadDWARFStmtList(bool include_line,
2061 dwarf::LineInfoReader* line_info_reader,
2062 RangeSink* sink) {
2063 uint64_t span_startaddr = 0;
2064 std::string last_source;
2065
2066 while (line_info_reader->ReadLineInfo()) {
2067 const auto& line_info = line_info_reader->lineinfo();
2068 auto addr = line_info.address;
2069 auto number = line_info.line;
2070 auto name =
2071 line_info.end_sequence
2072 ? last_source
2073 : LineInfoKey(line_info_reader->GetExpandedFilename(line_info.file),
2074 number, include_line);
2075 if (!span_startaddr) {
2076 span_startaddr = addr;
2077 } else if (line_info.end_sequence ||
2078 (!last_source.empty() && name != last_source)) {
2079 sink->AddVMRange("dwarf_stmtlist", span_startaddr, addr - span_startaddr,
2080 last_source);
2081 if (line_info.end_sequence) {
2082 span_startaddr = 0;
2083 } else {
2084 span_startaddr = addr;
2085 }
2086 }
2087 last_source = name;
2088 }
2089 }
2090
ReadDWARFInlines(const dwarf::File & file,RangeSink * sink,bool include_line)2091 void ReadDWARFInlines(const dwarf::File& file, RangeSink* sink,
2092 bool include_line) {
2093 if (!file.debug_info.size() || !file.debug_line.size()) {
2094 THROW("no debug info");
2095 }
2096
2097 dwarf::DIEReader die_reader(file);
2098 dwarf::LineInfoReader line_info_reader(file);
2099 dwarf::AttrReader<InlinesDIE> attr_reader;
2100
2101 attr_reader.OnAttribute(
2102 DW_AT_stmt_list, [](InlinesDIE* die, dwarf::AttrValue data) {
2103 absl::optional<uint64_t> uint = data.ToUint();
2104 if (!uint.has_value()) return;
2105 die->set_stmt_list(uint.value());
2106 });
2107
2108 if (!die_reader.SeekToStart(dwarf::DIEReader::Section::kDebugInfo)) {
2109 THROW("debug info is present, but empty");
2110 }
2111
2112 while (true) {
2113 InlinesDIE die;
2114 attr_reader.ReadAttributes(&die_reader, &die);
2115
2116 if (die.has_stmt_list()) {
2117 uint64_t offset = die.stmt_list();
2118 line_info_reader.SeekToOffset(offset,
2119 die_reader.unit_sizes().address_size());
2120 ReadDWARFStmtList(include_line, &line_info_reader, sink);
2121 }
2122
2123 if (!die_reader.NextCompilationUnit()) {
2124 return;
2125 }
2126 }
2127 }
2128
2129 } // namespace bloaty
2130