1 //===- DWARFAcceleratorTable.h ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_DWARF_DWARFACCELERATORTABLE_H
10 #define LLVM_DEBUGINFO_DWARF_DWARFACCELERATORTABLE_H
11 
12 #include "llvm/ADT/DenseSet.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/BinaryFormat/Dwarf.h"
15 #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
16 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
17 #include <cstdint>
18 #include <utility>
19 
20 namespace llvm {
21 
22 class raw_ostream;
23 class ScopedPrinter;
24 
25 /// The accelerator tables are designed to allow efficient random access
26 /// (using a symbol name as a key) into debug info by providing an index of the
27 /// debug info DIEs. This class implements the common functionality of Apple and
28 /// DWARF 5 accelerator tables.
29 /// TODO: Generalize the rest of the AppleAcceleratorTable interface and move it
30 /// to this class.
31 class DWARFAcceleratorTable {
32 protected:
33   DWARFDataExtractor AccelSection;
34   DataExtractor StringSection;
35 
36 public:
37   /// An abstract class representing a single entry in the accelerator tables.
38   class Entry {
39   protected:
40     SmallVector<DWARFFormValue, 3> Values;
41 
42     Entry() = default;
43 
44     // Make these protected so only (final) subclasses can be copied around.
45     Entry(const Entry &) = default;
46     Entry(Entry &&) = default;
47     Entry &operator=(const Entry &) = default;
48     Entry &operator=(Entry &&) = default;
49     ~Entry() = default;
50 
51 
52   public:
53     /// Returns the Offset of the Compilation Unit associated with this
54     /// Accelerator Entry or None if the Compilation Unit offset is not recorded
55     /// in this Accelerator Entry.
56     virtual Optional<uint64_t> getCUOffset() const = 0;
57 
58     /// Returns the Tag of the Debug Info Entry associated with this
59     /// Accelerator Entry or None if the Tag is not recorded in this
60     /// Accelerator Entry.
61     virtual Optional<dwarf::Tag> getTag() const = 0;
62 
63     /// Returns the raw values of fields in the Accelerator Entry. In general,
64     /// these can only be interpreted with the help of the metadata in the
65     /// owning Accelerator Table.
66     ArrayRef<DWARFFormValue> getValues() const { return Values; }
67   };
68 
69   DWARFAcceleratorTable(const DWARFDataExtractor &AccelSection,
70                         DataExtractor StringSection)
71       : AccelSection(AccelSection), StringSection(StringSection) {}
72   virtual ~DWARFAcceleratorTable();
73 
74   virtual Error extract() = 0;
75   virtual void dump(raw_ostream &OS) const = 0;
76 
77   DWARFAcceleratorTable(const DWARFAcceleratorTable &) = delete;
78   void operator=(const DWARFAcceleratorTable &) = delete;
79 };
80 
81 /// This implements the Apple accelerator table format, a precursor of the
82 /// DWARF 5 accelerator table format.
83 class AppleAcceleratorTable : public DWARFAcceleratorTable {
84   struct Header {
85     uint32_t Magic;
86     uint16_t Version;
87     uint16_t HashFunction;
88     uint32_t BucketCount;
89     uint32_t HashCount;
90     uint32_t HeaderDataLength;
91 
92     void dump(ScopedPrinter &W) const;
93   };
94 
95   struct HeaderData {
96     using AtomType = uint16_t;
97     using Form = dwarf::Form;
98 
99     uint64_t DIEOffsetBase;
100     SmallVector<std::pair<AtomType, Form>, 3> Atoms;
101 
102     Optional<uint64_t> extractOffset(Optional<DWARFFormValue> Value) const;
103   };
104 
105   struct Header Hdr;
106   struct HeaderData HdrData;
107   bool IsValid = false;
108 
109   /// Returns true if we should continue scanning for entries or false if we've
110   /// reached the last (sentinel) entry of encountered a parsing error.
111   bool dumpName(ScopedPrinter &W, SmallVectorImpl<DWARFFormValue> &AtomForms,
112                 uint64_t *DataOffset) const;
113 
114 public:
115   /// Apple-specific implementation of an Accelerator Entry.
116   class Entry final : public DWARFAcceleratorTable::Entry {
117     const HeaderData *HdrData = nullptr;
118 
119     Entry(const HeaderData &Data);
120     Entry() = default;
121 
122     void extract(const AppleAcceleratorTable &AccelTable, uint64_t *Offset);
123 
124   public:
125     Optional<uint64_t> getCUOffset() const override;
126 
127     /// Returns the Section Offset of the Debug Info Entry associated with this
128     /// Accelerator Entry or None if the DIE offset is not recorded in this
129     /// Accelerator Entry. The returned offset is relative to the start of the
130     /// Section containing the DIE.
131     Optional<uint64_t> getDIESectionOffset() const;
132 
133     Optional<dwarf::Tag> getTag() const override;
134 
135     /// Returns the value of the Atom in this Accelerator Entry, if the Entry
136     /// contains such Atom.
137     Optional<DWARFFormValue> lookup(HeaderData::AtomType Atom) const;
138 
139     friend class AppleAcceleratorTable;
140     friend class ValueIterator;
141   };
142 
143   class ValueIterator {
144     const AppleAcceleratorTable *AccelTable = nullptr;
145     Entry Current;           ///< The current entry.
146     uint64_t DataOffset = 0; ///< Offset into the section.
147     unsigned Data = 0; ///< Current data entry.
148     unsigned NumData = 0; ///< Number of data entries.
149 
150     /// Advance the iterator.
151     void Next();
152 
153   public:
154     using iterator_category = std::input_iterator_tag;
155     using value_type = Entry;
156     using difference_type = std::ptrdiff_t;
157     using pointer = value_type *;
158     using reference = value_type &;
159 
160     /// Construct a new iterator for the entries at \p DataOffset.
161     ValueIterator(const AppleAcceleratorTable &AccelTable, uint64_t DataOffset);
162     /// End marker.
163     ValueIterator() = default;
164 
165     const Entry &operator*() const { return Current; }
166     ValueIterator &operator++() { Next(); return *this; }
167     ValueIterator operator++(int) {
168       ValueIterator I = *this;
169       Next();
170       return I;
171     }
172     friend bool operator==(const ValueIterator &A, const ValueIterator &B) {
173       return A.NumData == B.NumData && A.DataOffset == B.DataOffset;
174     }
175     friend bool operator!=(const ValueIterator &A, const ValueIterator &B) {
176       return !(A == B);
177     }
178   };
179 
180   AppleAcceleratorTable(const DWARFDataExtractor &AccelSection,
181                         DataExtractor StringSection)
182       : DWARFAcceleratorTable(AccelSection, StringSection) {}
183 
184   Error extract() override;
185   uint32_t getNumBuckets();
186   uint32_t getNumHashes();
187   uint32_t getSizeHdr();
188   uint32_t getHeaderDataLength();
189 
190   /// Return the Atom description, which can be used to interpret the raw values
191   /// of the Accelerator Entries in this table.
192   ArrayRef<std::pair<HeaderData::AtomType, HeaderData::Form>> getAtomsDesc();
193   bool validateForms();
194 
195   /// Return information related to the DWARF DIE we're looking for when
196   /// performing a lookup by name.
197   ///
198   /// \param HashDataOffset an offset into the hash data table
199   /// \returns <DieOffset, DieTag>
200   /// DieOffset is the offset into the .debug_info section for the DIE
201   /// related to the input hash data offset.
202   /// DieTag is the tag of the DIE
203   std::pair<uint64_t, dwarf::Tag> readAtoms(uint64_t *HashDataOffset);
204   void dump(raw_ostream &OS) const override;
205 
206   /// Look up all entries in the accelerator table matching \c Key.
207   iterator_range<ValueIterator> equal_range(StringRef Key) const;
208 };
209 
210 /// .debug_names section consists of one or more units. Each unit starts with a
211 /// header, which is followed by a list of compilation units, local and foreign
212 /// type units.
213 ///
214 /// These may be followed by an (optional) hash lookup table, which consists of
215 /// an array of buckets and hashes similar to the apple tables above. The only
216 /// difference is that the hashes array is 1-based, and consequently an empty
217 /// bucket is denoted by 0 and not UINT32_MAX.
218 ///
219 /// Next is the name table, which consists of an array of names and array of
220 /// entry offsets. This is different from the apple tables, which store names
221 /// next to the actual entries.
222 ///
223 /// The structure of the entries is described by an abbreviations table, which
224 /// comes after the name table. Unlike the apple tables, which have a uniform
225 /// entry structure described in the header, each .debug_names entry may have
226 /// different index attributes (DW_IDX_???) attached to it.
227 ///
228 /// The last segment consists of a list of entries, which is a 0-terminated list
229 /// referenced by the name table and interpreted with the help of the
230 /// abbreviation table.
231 class DWARFDebugNames : public DWARFAcceleratorTable {
232 public:
233   class NameIndex;
234   class NameIterator;
235   class ValueIterator;
236 
237   /// DWARF v5 Name Index header.
238   struct Header {
239     uint64_t UnitLength;
240     dwarf::DwarfFormat Format;
241     uint16_t Version;
242     uint32_t CompUnitCount;
243     uint32_t LocalTypeUnitCount;
244     uint32_t ForeignTypeUnitCount;
245     uint32_t BucketCount;
246     uint32_t NameCount;
247     uint32_t AbbrevTableSize;
248     uint32_t AugmentationStringSize;
249     SmallString<8> AugmentationString;
250 
251     Error extract(const DWARFDataExtractor &AS, uint64_t *Offset);
252     void dump(ScopedPrinter &W) const;
253   };
254 
255   /// Index attribute and its encoding.
256   struct AttributeEncoding {
257     dwarf::Index Index;
258     dwarf::Form Form;
259 
260     constexpr AttributeEncoding(dwarf::Index Index, dwarf::Form Form)
261         : Index(Index), Form(Form) {}
262 
263     friend bool operator==(const AttributeEncoding &LHS,
264                            const AttributeEncoding &RHS) {
265       return LHS.Index == RHS.Index && LHS.Form == RHS.Form;
266     }
267   };
268 
269   /// Abbreviation describing the encoding of Name Index entries.
270   struct Abbrev {
271     uint32_t Code;  ///< Abbreviation code
272     dwarf::Tag Tag; ///< Dwarf Tag of the described entity.
273     std::vector<AttributeEncoding> Attributes; ///< List of index attributes.
274 
275     Abbrev(uint32_t Code, dwarf::Tag Tag,
276            std::vector<AttributeEncoding> Attributes)
277         : Code(Code), Tag(Tag), Attributes(std::move(Attributes)) {}
278 
279     void dump(ScopedPrinter &W) const;
280   };
281 
282   /// DWARF v5-specific implementation of an Accelerator Entry.
283   class Entry final : public DWARFAcceleratorTable::Entry {
284     const NameIndex *NameIdx;
285     const Abbrev *Abbr;
286 
287     Entry(const NameIndex &NameIdx, const Abbrev &Abbr);
288 
289   public:
290     Optional<uint64_t> getCUOffset() const override;
291     Optional<dwarf::Tag> getTag() const override { return tag(); }
292 
293     /// Returns the Index into the Compilation Unit list of the owning Name
294     /// Index or None if this Accelerator Entry does not have an associated
295     /// Compilation Unit. It is up to the user to verify that the returned Index
296     /// is valid in the owning NameIndex (or use getCUOffset(), which will
297     /// handle that check itself). Note that entries in NameIndexes which index
298     /// just a single Compilation Unit are implicitly associated with that unit,
299     /// so this function will return 0 even without an explicit
300     /// DW_IDX_compile_unit attribute.
301     Optional<uint64_t> getCUIndex() const;
302 
303     /// .debug_names-specific getter, which always succeeds (DWARF v5 index
304     /// entries always have a tag).
305     dwarf::Tag tag() const { return Abbr->Tag; }
306 
307     /// Returns the Offset of the DIE within the containing CU or TU.
308     Optional<uint64_t> getDIEUnitOffset() const;
309 
310     /// Return the Abbreviation that can be used to interpret the raw values of
311     /// this Accelerator Entry.
312     const Abbrev &getAbbrev() const { return *Abbr; }
313 
314     /// Returns the value of the Index Attribute in this Accelerator Entry, if
315     /// the Entry contains such Attribute.
316     Optional<DWARFFormValue> lookup(dwarf::Index Index) const;
317 
318     void dump(ScopedPrinter &W) const;
319 
320     friend class NameIndex;
321     friend class ValueIterator;
322   };
323 
324   /// Error returned by NameIndex::getEntry to report it has reached the end of
325   /// the entry list.
326   class SentinelError : public ErrorInfo<SentinelError> {
327   public:
328     static char ID;
329 
330     void log(raw_ostream &OS) const override { OS << "Sentinel"; }
331     std::error_code convertToErrorCode() const override;
332   };
333 
334 private:
335   /// DenseMapInfo for struct Abbrev.
336   struct AbbrevMapInfo {
337     static Abbrev getEmptyKey();
338     static Abbrev getTombstoneKey();
339     static unsigned getHashValue(uint32_t Code) {
340       return DenseMapInfo<uint32_t>::getHashValue(Code);
341     }
342     static unsigned getHashValue(const Abbrev &Abbr) {
343       return getHashValue(Abbr.Code);
344     }
345     static bool isEqual(uint32_t LHS, const Abbrev &RHS) {
346       return LHS == RHS.Code;
347     }
348     static bool isEqual(const Abbrev &LHS, const Abbrev &RHS) {
349       return LHS.Code == RHS.Code;
350     }
351   };
352 
353 public:
354   /// A single entry in the Name Table (DWARF v5 sect. 6.1.1.4.6) of the Name
355   /// Index.
356   class NameTableEntry {
357     DataExtractor StrData;
358 
359     uint32_t Index;
360     uint64_t StringOffset;
361     uint64_t EntryOffset;
362 
363   public:
364     NameTableEntry(const DataExtractor &StrData, uint32_t Index,
365                    uint64_t StringOffset, uint64_t EntryOffset)
366         : StrData(StrData), Index(Index), StringOffset(StringOffset),
367           EntryOffset(EntryOffset) {}
368 
369     /// Return the index of this name in the parent Name Index.
370     uint32_t getIndex() const { return Index; }
371 
372     /// Returns the offset of the name of the described entities.
373     uint64_t getStringOffset() const { return StringOffset; }
374 
375     /// Return the string referenced by this name table entry or nullptr if the
376     /// string offset is not valid.
377     const char *getString() const {
378       uint64_t Off = StringOffset;
379       return StrData.getCStr(&Off);
380     }
381 
382     /// Returns the offset of the first Entry in the list.
383     uint64_t getEntryOffset() const { return EntryOffset; }
384   };
385 
386   /// Represents a single accelerator table within the DWARF v5 .debug_names
387   /// section.
388   class NameIndex {
389     DenseSet<Abbrev, AbbrevMapInfo> Abbrevs;
390     struct Header Hdr;
391     const DWARFDebugNames &Section;
392 
393     // Base of the whole unit and of various important tables, as offsets from
394     // the start of the section.
395     uint64_t Base;
396     uint64_t CUsBase;
397     uint64_t BucketsBase;
398     uint64_t HashesBase;
399     uint64_t StringOffsetsBase;
400     uint64_t EntryOffsetsBase;
401     uint64_t EntriesBase;
402 
403     void dumpCUs(ScopedPrinter &W) const;
404     void dumpLocalTUs(ScopedPrinter &W) const;
405     void dumpForeignTUs(ScopedPrinter &W) const;
406     void dumpAbbreviations(ScopedPrinter &W) const;
407     bool dumpEntry(ScopedPrinter &W, uint64_t *Offset) const;
408     void dumpName(ScopedPrinter &W, const NameTableEntry &NTE,
409                   Optional<uint32_t> Hash) const;
410     void dumpBucket(ScopedPrinter &W, uint32_t Bucket) const;
411 
412     Expected<AttributeEncoding> extractAttributeEncoding(uint64_t *Offset);
413 
414     Expected<std::vector<AttributeEncoding>>
415     extractAttributeEncodings(uint64_t *Offset);
416 
417     Expected<Abbrev> extractAbbrev(uint64_t *Offset);
418 
419   public:
420     NameIndex(const DWARFDebugNames &Section, uint64_t Base)
421         : Section(Section), Base(Base) {}
422 
423     /// Reads offset of compilation unit CU. CU is 0-based.
424     uint64_t getCUOffset(uint32_t CU) const;
425     uint32_t getCUCount() const { return Hdr.CompUnitCount; }
426 
427     /// Reads offset of local type unit TU, TU is 0-based.
428     uint64_t getLocalTUOffset(uint32_t TU) const;
429     uint32_t getLocalTUCount() const { return Hdr.LocalTypeUnitCount; }
430 
431     /// Reads signature of foreign type unit TU. TU is 0-based.
432     uint64_t getForeignTUSignature(uint32_t TU) const;
433     uint32_t getForeignTUCount() const { return Hdr.ForeignTypeUnitCount; }
434 
435     /// Reads an entry in the Bucket Array for the given Bucket. The returned
436     /// value is a (1-based) index into the Names, StringOffsets and
437     /// EntryOffsets arrays. The input Bucket index is 0-based.
438     uint32_t getBucketArrayEntry(uint32_t Bucket) const;
439     uint32_t getBucketCount() const { return Hdr.BucketCount; }
440 
441     /// Reads an entry in the Hash Array for the given Index. The input Index
442     /// is 1-based.
443     uint32_t getHashArrayEntry(uint32_t Index) const;
444 
445     /// Reads an entry in the Name Table for the given Index. The Name Table
446     /// consists of two arrays -- String Offsets and Entry Offsets. The returned
447     /// offsets are relative to the starts of respective sections. Input Index
448     /// is 1-based.
449     NameTableEntry getNameTableEntry(uint32_t Index) const;
450 
451     uint32_t getNameCount() const { return Hdr.NameCount; }
452 
453     const DenseSet<Abbrev, AbbrevMapInfo> &getAbbrevs() const {
454       return Abbrevs;
455     }
456 
457     Expected<Entry> getEntry(uint64_t *Offset) const;
458 
459     /// Look up all entries in this Name Index matching \c Key.
460     iterator_range<ValueIterator> equal_range(StringRef Key) const;
461 
462     NameIterator begin() const { return NameIterator(this, 1); }
463     NameIterator end() const { return NameIterator(this, getNameCount() + 1); }
464 
465     Error extract();
466     uint64_t getUnitOffset() const { return Base; }
467     uint64_t getNextUnitOffset() const {
468       return Base + dwarf::getUnitLengthFieldByteSize(Hdr.Format) +
469              Hdr.UnitLength;
470     }
471     void dump(ScopedPrinter &W) const;
472 
473     friend class DWARFDebugNames;
474   };
475 
476   class ValueIterator {
477   public:
478     using iterator_category = std::input_iterator_tag;
479     using value_type = Entry;
480     using difference_type = std::ptrdiff_t;
481     using pointer = value_type *;
482     using reference = value_type &;
483 
484   private:
485     /// The Name Index we are currently iterating through. The implementation
486     /// relies on the fact that this can also be used as an iterator into the
487     /// "NameIndices" vector in the Accelerator section.
488     const NameIndex *CurrentIndex = nullptr;
489 
490     /// Whether this is a local iterator (searches in CurrentIndex only) or not
491     /// (searches all name indices).
492     bool IsLocal;
493 
494     Optional<Entry> CurrentEntry;
495     uint64_t DataOffset = 0; ///< Offset into the section.
496     std::string Key;         ///< The Key we are searching for.
497     Optional<uint32_t> Hash; ///< Hash of Key, if it has been computed.
498 
499     bool getEntryAtCurrentOffset();
500     Optional<uint64_t> findEntryOffsetInCurrentIndex();
501     bool findInCurrentIndex();
502     void searchFromStartOfCurrentIndex();
503     void next();
504 
505     /// Set the iterator to the "end" state.
506     void setEnd() { *this = ValueIterator(); }
507 
508   public:
509     /// Create a "begin" iterator for looping over all entries in the
510     /// accelerator table matching Key. The iterator will run through all Name
511     /// Indexes in the section in sequence.
512     ValueIterator(const DWARFDebugNames &AccelTable, StringRef Key);
513 
514     /// Create a "begin" iterator for looping over all entries in a specific
515     /// Name Index. Other indices in the section will not be visited.
516     ValueIterator(const NameIndex &NI, StringRef Key);
517 
518     /// End marker.
519     ValueIterator() = default;
520 
521     const Entry &operator*() const { return *CurrentEntry; }
522     ValueIterator &operator++() {
523       next();
524       return *this;
525     }
526     ValueIterator operator++(int) {
527       ValueIterator I = *this;
528       next();
529       return I;
530     }
531 
532     friend bool operator==(const ValueIterator &A, const ValueIterator &B) {
533       return A.CurrentIndex == B.CurrentIndex && A.DataOffset == B.DataOffset;
534     }
535     friend bool operator!=(const ValueIterator &A, const ValueIterator &B) {
536       return !(A == B);
537     }
538   };
539 
540   class NameIterator {
541 
542     /// The Name Index we are iterating through.
543     const NameIndex *CurrentIndex;
544 
545     /// The current name in the Name Index.
546     uint32_t CurrentName;
547 
548     void next() {
549       assert(CurrentName <= CurrentIndex->getNameCount());
550       ++CurrentName;
551     }
552 
553   public:
554     using iterator_category = std::input_iterator_tag;
555     using value_type = NameTableEntry;
556     using difference_type = uint32_t;
557     using pointer = NameTableEntry *;
558     using reference = NameTableEntry; // We return entries by value.
559 
560     /// Creates an iterator whose initial position is name CurrentName in
561     /// CurrentIndex.
562     NameIterator(const NameIndex *CurrentIndex, uint32_t CurrentName)
563         : CurrentIndex(CurrentIndex), CurrentName(CurrentName) {}
564 
565     NameTableEntry operator*() const {
566       return CurrentIndex->getNameTableEntry(CurrentName);
567     }
568     NameIterator &operator++() {
569       next();
570       return *this;
571     }
572     NameIterator operator++(int) {
573       NameIterator I = *this;
574       next();
575       return I;
576     }
577 
578     friend bool operator==(const NameIterator &A, const NameIterator &B) {
579       return A.CurrentIndex == B.CurrentIndex && A.CurrentName == B.CurrentName;
580     }
581     friend bool operator!=(const NameIterator &A, const NameIterator &B) {
582       return !(A == B);
583     }
584   };
585 
586 private:
587   SmallVector<NameIndex, 0> NameIndices;
588   DenseMap<uint64_t, const NameIndex *> CUToNameIndex;
589 
590 public:
591   DWARFDebugNames(const DWARFDataExtractor &AccelSection,
592                   DataExtractor StringSection)
593       : DWARFAcceleratorTable(AccelSection, StringSection) {}
594 
595   Error extract() override;
596   void dump(raw_ostream &OS) const override;
597 
598   /// Look up all entries in the accelerator table matching \c Key.
599   iterator_range<ValueIterator> equal_range(StringRef Key) const;
600 
601   using const_iterator = SmallVector<NameIndex, 0>::const_iterator;
602   const_iterator begin() const { return NameIndices.begin(); }
603   const_iterator end() const { return NameIndices.end(); }
604 
605   /// Return the Name Index covering the compile unit at CUOffset, or nullptr if
606   /// there is no Name Index covering that unit.
607   const NameIndex *getCUNameIndex(uint64_t CUOffset);
608 };
609 
610 } // end namespace llvm
611 
612 #endif // LLVM_DEBUGINFO_DWARF_DWARFACCELERATORTABLE_H
613