1 //===- DWARFAcceleratorTable.h ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_DWARFACCELERATORTABLE_H
10 #define LLVM_DEBUGINFO_DWARFACCELERATORTABLE_H
11 
12 #include "llvm/ADT/DenseSet.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/BinaryFormat/Dwarf.h"
15 #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
16 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
17 #include <cstdint>
18 #include <utility>
19 
20 namespace llvm {
21 
22 class raw_ostream;
23 class ScopedPrinter;
24 
25 /// The accelerator tables are designed to allow efficient random access
26 /// (using a symbol name as a key) into debug info by providing an index of the
27 /// debug info DIEs. This class implements the common functionality of Apple and
28 /// DWARF 5 accelerator tables.
29 /// TODO: Generalize the rest of the AppleAcceleratorTable interface and move it
30 /// to this class.
31 class DWARFAcceleratorTable {
32 protected:
33   DWARFDataExtractor AccelSection;
34   DataExtractor StringSection;
35 
36 public:
37   /// An abstract class representing a single entry in the accelerator tables.
38   class Entry {
39   protected:
40     SmallVector<DWARFFormValue, 3> Values;
41 
42     Entry() = default;
43 
44     // Make these protected so only (final) subclasses can be copied around.
45     Entry(const Entry &) = default;
46     Entry(Entry &&) = default;
47     Entry &operator=(const Entry &) = default;
48     Entry &operator=(Entry &&) = default;
49     ~Entry() = default;
50 
51 
52   public:
53     /// Returns the Offset of the Compilation Unit associated with this
54     /// Accelerator Entry or None if the Compilation Unit offset is not recorded
55     /// in this Accelerator Entry.
56     virtual Optional<uint64_t> getCUOffset() const = 0;
57 
58     /// Returns the Tag of the Debug Info Entry associated with this
59     /// Accelerator Entry or None if the Tag is not recorded in this
60     /// Accelerator Entry.
61     virtual Optional<dwarf::Tag> getTag() const = 0;
62 
63     /// Returns the raw values of fields in the Accelerator Entry. In general,
64     /// these can only be interpreted with the help of the metadata in the
65     /// owning Accelerator Table.
66     ArrayRef<DWARFFormValue> getValues() const { return Values; }
67   };
68 
69   DWARFAcceleratorTable(const DWARFDataExtractor &AccelSection,
70                         DataExtractor StringSection)
71       : AccelSection(AccelSection), StringSection(StringSection) {}
72   virtual ~DWARFAcceleratorTable();
73 
74   virtual Error extract() = 0;
75   virtual void dump(raw_ostream &OS) const = 0;
76 
77   DWARFAcceleratorTable(const DWARFAcceleratorTable &) = delete;
78   void operator=(const DWARFAcceleratorTable &) = delete;
79 };
80 
81 /// This implements the Apple accelerator table format, a precursor of the
82 /// DWARF 5 accelerator table format.
83 class AppleAcceleratorTable : public DWARFAcceleratorTable {
84   struct Header {
85     uint32_t Magic;
86     uint16_t Version;
87     uint16_t HashFunction;
88     uint32_t BucketCount;
89     uint32_t HashCount;
90     uint32_t HeaderDataLength;
91 
92     void dump(ScopedPrinter &W) const;
93   };
94 
95   struct HeaderData {
96     using AtomType = uint16_t;
97     using Form = dwarf::Form;
98 
99     uint64_t DIEOffsetBase;
100     SmallVector<std::pair<AtomType, Form>, 3> Atoms;
101 
102     Optional<uint64_t> extractOffset(Optional<DWARFFormValue> Value) const;
103   };
104 
105   struct Header Hdr;
106   struct HeaderData HdrData;
107   bool IsValid = false;
108 
109   /// Returns true if we should continue scanning for entries or false if we've
110   /// reached the last (sentinel) entry of encountered a parsing error.
111   bool dumpName(ScopedPrinter &W, SmallVectorImpl<DWARFFormValue> &AtomForms,
112                 uint64_t *DataOffset) const;
113 
114 public:
115   /// Apple-specific implementation of an Accelerator Entry.
116   class Entry final : public DWARFAcceleratorTable::Entry {
117     const HeaderData *HdrData = nullptr;
118 
119     Entry(const HeaderData &Data);
120     Entry() = default;
121 
122     void extract(const AppleAcceleratorTable &AccelTable, uint64_t *Offset);
123 
124   public:
125     Optional<uint64_t> getCUOffset() const override;
126 
127     /// Returns the Section Offset of the Debug Info Entry associated with this
128     /// Accelerator Entry or None if the DIE offset is not recorded in this
129     /// Accelerator Entry. The returned offset is relative to the start of the
130     /// Section containing the DIE.
131     Optional<uint64_t> getDIESectionOffset() const;
132 
133     Optional<dwarf::Tag> getTag() const override;
134 
135     /// Returns the value of the Atom in this Accelerator Entry, if the Entry
136     /// contains such Atom.
137     Optional<DWARFFormValue> lookup(HeaderData::AtomType Atom) const;
138 
139     friend class AppleAcceleratorTable;
140     friend class ValueIterator;
141   };
142 
143   class ValueIterator : public std::iterator<std::input_iterator_tag, Entry> {
144     const AppleAcceleratorTable *AccelTable = nullptr;
145     Entry Current;           ///< The current entry.
146     uint64_t DataOffset = 0; ///< Offset into the section.
147     unsigned Data = 0; ///< Current data entry.
148     unsigned NumData = 0; ///< Number of data entries.
149 
150     /// Advance the iterator.
151     void Next();
152   public:
153     /// Construct a new iterator for the entries at \p DataOffset.
154     ValueIterator(const AppleAcceleratorTable &AccelTable, uint64_t DataOffset);
155     /// End marker.
156     ValueIterator() = default;
157 
158     const Entry &operator*() const { return Current; }
159     ValueIterator &operator++() { Next(); return *this; }
160     ValueIterator operator++(int) {
161       ValueIterator I = *this;
162       Next();
163       return I;
164     }
165     friend bool operator==(const ValueIterator &A, const ValueIterator &B) {
166       return A.NumData == B.NumData && A.DataOffset == B.DataOffset;
167     }
168     friend bool operator!=(const ValueIterator &A, const ValueIterator &B) {
169       return !(A == B);
170     }
171   };
172 
173   AppleAcceleratorTable(const DWARFDataExtractor &AccelSection,
174                         DataExtractor StringSection)
175       : DWARFAcceleratorTable(AccelSection, StringSection) {}
176 
177   Error extract() override;
178   uint32_t getNumBuckets();
179   uint32_t getNumHashes();
180   uint32_t getSizeHdr();
181   uint32_t getHeaderDataLength();
182 
183   /// Return the Atom description, which can be used to interpret the raw values
184   /// of the Accelerator Entries in this table.
185   ArrayRef<std::pair<HeaderData::AtomType, HeaderData::Form>> getAtomsDesc();
186   bool validateForms();
187 
188   /// Return information related to the DWARF DIE we're looking for when
189   /// performing a lookup by name.
190   ///
191   /// \param HashDataOffset an offset into the hash data table
192   /// \returns <DieOffset, DieTag>
193   /// DieOffset is the offset into the .debug_info section for the DIE
194   /// related to the input hash data offset.
195   /// DieTag is the tag of the DIE
196   std::pair<uint64_t, dwarf::Tag> readAtoms(uint64_t *HashDataOffset);
197   void dump(raw_ostream &OS) const override;
198 
199   /// Look up all entries in the accelerator table matching \c Key.
200   iterator_range<ValueIterator> equal_range(StringRef Key) const;
201 };
202 
203 /// .debug_names section consists of one or more units. Each unit starts with a
204 /// header, which is followed by a list of compilation units, local and foreign
205 /// type units.
206 ///
207 /// These may be followed by an (optional) hash lookup table, which consists of
208 /// an array of buckets and hashes similar to the apple tables above. The only
209 /// difference is that the hashes array is 1-based, and consequently an empty
210 /// bucket is denoted by 0 and not UINT32_MAX.
211 ///
212 /// Next is the name table, which consists of an array of names and array of
213 /// entry offsets. This is different from the apple tables, which store names
214 /// next to the actual entries.
215 ///
216 /// The structure of the entries is described by an abbreviations table, which
217 /// comes after the name table. Unlike the apple tables, which have a uniform
218 /// entry structure described in the header, each .debug_names entry may have
219 /// different index attributes (DW_IDX_???) attached to it.
220 ///
221 /// The last segment consists of a list of entries, which is a 0-terminated list
222 /// referenced by the name table and interpreted with the help of the
223 /// abbreviation table.
224 class DWARFDebugNames : public DWARFAcceleratorTable {
225   /// The fixed-size part of a DWARF v5 Name Index header
226   struct HeaderPOD {
227     uint32_t UnitLength;
228     uint16_t Version;
229     uint16_t Padding;
230     uint32_t CompUnitCount;
231     uint32_t LocalTypeUnitCount;
232     uint32_t ForeignTypeUnitCount;
233     uint32_t BucketCount;
234     uint32_t NameCount;
235     uint32_t AbbrevTableSize;
236     uint32_t AugmentationStringSize;
237   };
238 
239 public:
240   class NameIndex;
241   class NameIterator;
242   class ValueIterator;
243 
244   /// DWARF v5 Name Index header.
245   struct Header : public HeaderPOD {
246     SmallString<8> AugmentationString;
247 
248     Error extract(const DWARFDataExtractor &AS, uint64_t *Offset);
249     void dump(ScopedPrinter &W) const;
250   };
251 
252   /// Index attribute and its encoding.
253   struct AttributeEncoding {
254     dwarf::Index Index;
255     dwarf::Form Form;
256 
257     constexpr AttributeEncoding(dwarf::Index Index, dwarf::Form Form)
258         : Index(Index), Form(Form) {}
259 
260     friend bool operator==(const AttributeEncoding &LHS,
261                            const AttributeEncoding &RHS) {
262       return LHS.Index == RHS.Index && LHS.Form == RHS.Form;
263     }
264   };
265 
266   /// Abbreviation describing the encoding of Name Index entries.
267   struct Abbrev {
268     uint32_t Code;  ///< Abbreviation code
269     dwarf::Tag Tag; ///< Dwarf Tag of the described entity.
270     std::vector<AttributeEncoding> Attributes; ///< List of index attributes.
271 
272     Abbrev(uint32_t Code, dwarf::Tag Tag,
273            std::vector<AttributeEncoding> Attributes)
274         : Code(Code), Tag(Tag), Attributes(std::move(Attributes)) {}
275 
276     void dump(ScopedPrinter &W) const;
277   };
278 
279   /// DWARF v5-specific implementation of an Accelerator Entry.
280   class Entry final : public DWARFAcceleratorTable::Entry {
281     const NameIndex *NameIdx;
282     const Abbrev *Abbr;
283 
284     Entry(const NameIndex &NameIdx, const Abbrev &Abbr);
285 
286   public:
287     Optional<uint64_t> getCUOffset() const override;
288     Optional<dwarf::Tag> getTag() const override { return tag(); }
289 
290     /// Returns the Index into the Compilation Unit list of the owning Name
291     /// Index or None if this Accelerator Entry does not have an associated
292     /// Compilation Unit. It is up to the user to verify that the returned Index
293     /// is valid in the owning NameIndex (or use getCUOffset(), which will
294     /// handle that check itself). Note that entries in NameIndexes which index
295     /// just a single Compilation Unit are implicitly associated with that unit,
296     /// so this function will return 0 even without an explicit
297     /// DW_IDX_compile_unit attribute.
298     Optional<uint64_t> getCUIndex() const;
299 
300     /// .debug_names-specific getter, which always succeeds (DWARF v5 index
301     /// entries always have a tag).
302     dwarf::Tag tag() const { return Abbr->Tag; }
303 
304     /// Returns the Offset of the DIE within the containing CU or TU.
305     Optional<uint64_t> getDIEUnitOffset() const;
306 
307     /// Return the Abbreviation that can be used to interpret the raw values of
308     /// this Accelerator Entry.
309     const Abbrev &getAbbrev() const { return *Abbr; }
310 
311     /// Returns the value of the Index Attribute in this Accelerator Entry, if
312     /// the Entry contains such Attribute.
313     Optional<DWARFFormValue> lookup(dwarf::Index Index) const;
314 
315     void dump(ScopedPrinter &W) const;
316 
317     friend class NameIndex;
318     friend class ValueIterator;
319   };
320 
321   /// Error returned by NameIndex::getEntry to report it has reached the end of
322   /// the entry list.
323   class SentinelError : public ErrorInfo<SentinelError> {
324   public:
325     static char ID;
326 
327     void log(raw_ostream &OS) const override { OS << "Sentinel"; }
328     std::error_code convertToErrorCode() const override;
329   };
330 
331 private:
332   /// DenseMapInfo for struct Abbrev.
333   struct AbbrevMapInfo {
334     static Abbrev getEmptyKey();
335     static Abbrev getTombstoneKey();
336     static unsigned getHashValue(uint32_t Code) {
337       return DenseMapInfo<uint32_t>::getHashValue(Code);
338     }
339     static unsigned getHashValue(const Abbrev &Abbr) {
340       return getHashValue(Abbr.Code);
341     }
342     static bool isEqual(uint32_t LHS, const Abbrev &RHS) {
343       return LHS == RHS.Code;
344     }
345     static bool isEqual(const Abbrev &LHS, const Abbrev &RHS) {
346       return LHS.Code == RHS.Code;
347     }
348   };
349 
350 public:
351   /// A single entry in the Name Table (DWARF v5 sect. 6.1.1.4.6) of the Name
352   /// Index.
353   class NameTableEntry {
354     DataExtractor StrData;
355 
356     uint32_t Index;
357     uint64_t StringOffset;
358     uint64_t EntryOffset;
359 
360   public:
361     NameTableEntry(const DataExtractor &StrData, uint32_t Index,
362                    uint64_t StringOffset, uint64_t EntryOffset)
363         : StrData(StrData), Index(Index), StringOffset(StringOffset),
364           EntryOffset(EntryOffset) {}
365 
366     /// Return the index of this name in the parent Name Index.
367     uint32_t getIndex() const { return Index; }
368 
369     /// Returns the offset of the name of the described entities.
370     uint64_t getStringOffset() const { return StringOffset; }
371 
372     /// Return the string referenced by this name table entry or nullptr if the
373     /// string offset is not valid.
374     const char *getString() const {
375       uint64_t Off = StringOffset;
376       return StrData.getCStr(&Off);
377     }
378 
379     /// Returns the offset of the first Entry in the list.
380     uint64_t getEntryOffset() const { return EntryOffset; }
381   };
382 
383   /// Represents a single accelerator table within the DWARF v5 .debug_names
384   /// section.
385   class NameIndex {
386     DenseSet<Abbrev, AbbrevMapInfo> Abbrevs;
387     struct Header Hdr;
388     const DWARFDebugNames &Section;
389 
390     // Base of the whole unit and of various important tables, as offsets from
391     // the start of the section.
392     uint64_t Base;
393     uint64_t CUsBase;
394     uint64_t BucketsBase;
395     uint64_t HashesBase;
396     uint64_t StringOffsetsBase;
397     uint64_t EntryOffsetsBase;
398     uint64_t EntriesBase;
399 
400     void dumpCUs(ScopedPrinter &W) const;
401     void dumpLocalTUs(ScopedPrinter &W) const;
402     void dumpForeignTUs(ScopedPrinter &W) const;
403     void dumpAbbreviations(ScopedPrinter &W) const;
404     bool dumpEntry(ScopedPrinter &W, uint64_t *Offset) const;
405     void dumpName(ScopedPrinter &W, const NameTableEntry &NTE,
406                   Optional<uint32_t> Hash) const;
407     void dumpBucket(ScopedPrinter &W, uint32_t Bucket) const;
408 
409     Expected<AttributeEncoding> extractAttributeEncoding(uint64_t *Offset);
410 
411     Expected<std::vector<AttributeEncoding>>
412     extractAttributeEncodings(uint64_t *Offset);
413 
414     Expected<Abbrev> extractAbbrev(uint64_t *Offset);
415 
416   public:
417     NameIndex(const DWARFDebugNames &Section, uint64_t Base)
418         : Section(Section), Base(Base) {}
419 
420     /// Reads offset of compilation unit CU. CU is 0-based.
421     uint64_t getCUOffset(uint32_t CU) const;
422     uint32_t getCUCount() const { return Hdr.CompUnitCount; }
423 
424     /// Reads offset of local type unit TU, TU is 0-based.
425     uint64_t getLocalTUOffset(uint32_t TU) const;
426     uint32_t getLocalTUCount() const { return Hdr.LocalTypeUnitCount; }
427 
428     /// Reads signature of foreign type unit TU. TU is 0-based.
429     uint64_t getForeignTUSignature(uint32_t TU) const;
430     uint32_t getForeignTUCount() const { return Hdr.ForeignTypeUnitCount; }
431 
432     /// Reads an entry in the Bucket Array for the given Bucket. The returned
433     /// value is a (1-based) index into the Names, StringOffsets and
434     /// EntryOffsets arrays. The input Bucket index is 0-based.
435     uint32_t getBucketArrayEntry(uint32_t Bucket) const;
436     uint32_t getBucketCount() const { return Hdr.BucketCount; }
437 
438     /// Reads an entry in the Hash Array for the given Index. The input Index
439     /// is 1-based.
440     uint32_t getHashArrayEntry(uint32_t Index) const;
441 
442     /// Reads an entry in the Name Table for the given Index. The Name Table
443     /// consists of two arrays -- String Offsets and Entry Offsets. The returned
444     /// offsets are relative to the starts of respective sections. Input Index
445     /// is 1-based.
446     NameTableEntry getNameTableEntry(uint32_t Index) const;
447 
448     uint32_t getNameCount() const { return Hdr.NameCount; }
449 
450     const DenseSet<Abbrev, AbbrevMapInfo> &getAbbrevs() const {
451       return Abbrevs;
452     }
453 
454     Expected<Entry> getEntry(uint64_t *Offset) const;
455 
456     /// Look up all entries in this Name Index matching \c Key.
457     iterator_range<ValueIterator> equal_range(StringRef Key) const;
458 
459     NameIterator begin() const { return NameIterator(this, 1); }
460     NameIterator end() const { return NameIterator(this, getNameCount() + 1); }
461 
462     Error extract();
463     uint64_t getUnitOffset() const { return Base; }
464     uint64_t getNextUnitOffset() const { return Base + 4 + Hdr.UnitLength; }
465     void dump(ScopedPrinter &W) const;
466 
467     friend class DWARFDebugNames;
468   };
469 
470   class ValueIterator : public std::iterator<std::input_iterator_tag, Entry> {
471 
472     /// The Name Index we are currently iterating through. The implementation
473     /// relies on the fact that this can also be used as an iterator into the
474     /// "NameIndices" vector in the Accelerator section.
475     const NameIndex *CurrentIndex = nullptr;
476 
477     /// Whether this is a local iterator (searches in CurrentIndex only) or not
478     /// (searches all name indices).
479     bool IsLocal;
480 
481     Optional<Entry> CurrentEntry;
482     uint64_t DataOffset = 0; ///< Offset into the section.
483     std::string Key;         ///< The Key we are searching for.
484     Optional<uint32_t> Hash; ///< Hash of Key, if it has been computed.
485 
486     bool getEntryAtCurrentOffset();
487     Optional<uint64_t> findEntryOffsetInCurrentIndex();
488     bool findInCurrentIndex();
489     void searchFromStartOfCurrentIndex();
490     void next();
491 
492     /// Set the iterator to the "end" state.
493     void setEnd() { *this = ValueIterator(); }
494 
495   public:
496     /// Create a "begin" iterator for looping over all entries in the
497     /// accelerator table matching Key. The iterator will run through all Name
498     /// Indexes in the section in sequence.
499     ValueIterator(const DWARFDebugNames &AccelTable, StringRef Key);
500 
501     /// Create a "begin" iterator for looping over all entries in a specific
502     /// Name Index. Other indices in the section will not be visited.
503     ValueIterator(const NameIndex &NI, StringRef Key);
504 
505     /// End marker.
506     ValueIterator() = default;
507 
508     const Entry &operator*() const { return *CurrentEntry; }
509     ValueIterator &operator++() {
510       next();
511       return *this;
512     }
513     ValueIterator operator++(int) {
514       ValueIterator I = *this;
515       next();
516       return I;
517     }
518 
519     friend bool operator==(const ValueIterator &A, const ValueIterator &B) {
520       return A.CurrentIndex == B.CurrentIndex && A.DataOffset == B.DataOffset;
521     }
522     friend bool operator!=(const ValueIterator &A, const ValueIterator &B) {
523       return !(A == B);
524     }
525   };
526 
527   class NameIterator {
528 
529     /// The Name Index we are iterating through.
530     const NameIndex *CurrentIndex;
531 
532     /// The current name in the Name Index.
533     uint32_t CurrentName;
534 
535     void next() {
536       assert(CurrentName <= CurrentIndex->getNameCount());
537       ++CurrentName;
538     }
539 
540   public:
541     using iterator_category = std::input_iterator_tag;
542     using value_type = NameTableEntry;
543     using difference_type = uint32_t;
544     using pointer = NameTableEntry *;
545     using reference = NameTableEntry; // We return entries by value.
546 
547     /// Creates an iterator whose initial position is name CurrentName in
548     /// CurrentIndex.
549     NameIterator(const NameIndex *CurrentIndex, uint32_t CurrentName)
550         : CurrentIndex(CurrentIndex), CurrentName(CurrentName) {}
551 
552     NameTableEntry operator*() const {
553       return CurrentIndex->getNameTableEntry(CurrentName);
554     }
555     NameIterator &operator++() {
556       next();
557       return *this;
558     }
559     NameIterator operator++(int) {
560       NameIterator I = *this;
561       next();
562       return I;
563     }
564 
565     friend bool operator==(const NameIterator &A, const NameIterator &B) {
566       return A.CurrentIndex == B.CurrentIndex && A.CurrentName == B.CurrentName;
567     }
568     friend bool operator!=(const NameIterator &A, const NameIterator &B) {
569       return !(A == B);
570     }
571   };
572 
573 private:
574   SmallVector<NameIndex, 0> NameIndices;
575   DenseMap<uint64_t, const NameIndex *> CUToNameIndex;
576 
577 public:
578   DWARFDebugNames(const DWARFDataExtractor &AccelSection,
579                   DataExtractor StringSection)
580       : DWARFAcceleratorTable(AccelSection, StringSection) {}
581 
582   Error extract() override;
583   void dump(raw_ostream &OS) const override;
584 
585   /// Look up all entries in the accelerator table matching \c Key.
586   iterator_range<ValueIterator> equal_range(StringRef Key) const;
587 
588   using const_iterator = SmallVector<NameIndex, 0>::const_iterator;
589   const_iterator begin() const { return NameIndices.begin(); }
590   const_iterator end() const { return NameIndices.end(); }
591 
592   /// Return the Name Index covering the compile unit at CUOffset, or nullptr if
593   /// there is no Name Index covering that unit.
594   const NameIndex *getCUNameIndex(uint64_t CUOffset);
595 };
596 
597 } // end namespace llvm
598 
599 #endif // LLVM_DEBUGINFO_DWARFACCELERATORTABLE_H
600