1 //===- DWARFAcceleratorTable.h ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_DWARFACCELERATORTABLE_H
10 #define LLVM_DEBUGINFO_DWARFACCELERATORTABLE_H
11 
12 #include "llvm/ADT/DenseSet.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/BinaryFormat/Dwarf.h"
15 #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
16 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
17 #include <cstdint>
18 #include <utility>
19 
20 namespace llvm {
21 
22 class raw_ostream;
23 class ScopedPrinter;
24 
25 /// The accelerator tables are designed to allow efficient random access
26 /// (using a symbol name as a key) into debug info by providing an index of the
27 /// debug info DIEs. This class implements the common functionality of Apple and
28 /// DWARF 5 accelerator tables.
29 /// TODO: Generalize the rest of the AppleAcceleratorTable interface and move it
30 /// to this class.
31 class DWARFAcceleratorTable {
32 protected:
33   DWARFDataExtractor AccelSection;
34   DataExtractor StringSection;
35 
36 public:
37   /// An abstract class representing a single entry in the accelerator tables.
38   class Entry {
39   protected:
40     SmallVector<DWARFFormValue, 3> Values;
41 
42     Entry() = default;
43 
44     // Make these protected so only (final) subclasses can be copied around.
45     Entry(const Entry &) = default;
46     Entry(Entry &&) = default;
47     Entry &operator=(const Entry &) = default;
48     Entry &operator=(Entry &&) = default;
49     ~Entry() = default;
50 
51 
52   public:
53     /// Returns the Offset of the Compilation Unit associated with this
54     /// Accelerator Entry or None if the Compilation Unit offset is not recorded
55     /// in this Accelerator Entry.
56     virtual Optional<uint64_t> getCUOffset() const = 0;
57 
58     /// Returns the Tag of the Debug Info Entry associated with this
59     /// Accelerator Entry or None if the Tag is not recorded in this
60     /// Accelerator Entry.
61     virtual Optional<dwarf::Tag> getTag() const = 0;
62 
63     /// Returns the raw values of fields in the Accelerator Entry. In general,
64     /// these can only be interpreted with the help of the metadata in the
65     /// owning Accelerator Table.
66     ArrayRef<DWARFFormValue> getValues() const { return Values; }
67   };
68 
69   DWARFAcceleratorTable(const DWARFDataExtractor &AccelSection,
70                         DataExtractor StringSection)
71       : AccelSection(AccelSection), StringSection(StringSection) {}
72   virtual ~DWARFAcceleratorTable();
73 
74   virtual Error extract() = 0;
75   virtual void dump(raw_ostream &OS) const = 0;
76 
77   DWARFAcceleratorTable(const DWARFAcceleratorTable &) = delete;
78   void operator=(const DWARFAcceleratorTable &) = delete;
79 };
80 
81 /// This implements the Apple accelerator table format, a precursor of the
82 /// DWARF 5 accelerator table format.
83 class AppleAcceleratorTable : public DWARFAcceleratorTable {
84   struct Header {
85     uint32_t Magic;
86     uint16_t Version;
87     uint16_t HashFunction;
88     uint32_t BucketCount;
89     uint32_t HashCount;
90     uint32_t HeaderDataLength;
91 
92     void dump(ScopedPrinter &W) const;
93   };
94 
95   struct HeaderData {
96     using AtomType = uint16_t;
97     using Form = dwarf::Form;
98 
99     uint64_t DIEOffsetBase;
100     SmallVector<std::pair<AtomType, Form>, 3> Atoms;
101 
102     Optional<uint64_t> extractOffset(Optional<DWARFFormValue> Value) const;
103   };
104 
105   struct Header Hdr;
106   struct HeaderData HdrData;
107   bool IsValid = false;
108 
109   /// Returns true if we should continue scanning for entries or false if we've
110   /// reached the last (sentinel) entry of encountered a parsing error.
111   bool dumpName(ScopedPrinter &W, SmallVectorImpl<DWARFFormValue> &AtomForms,
112                 uint64_t *DataOffset) const;
113 
114 public:
115   /// Apple-specific implementation of an Accelerator Entry.
116   class Entry final : public DWARFAcceleratorTable::Entry {
117     const HeaderData *HdrData = nullptr;
118 
119     Entry(const HeaderData &Data);
120     Entry() = default;
121 
122     void extract(const AppleAcceleratorTable &AccelTable, uint64_t *Offset);
123 
124   public:
125     Optional<uint64_t> getCUOffset() const override;
126 
127     /// Returns the Section Offset of the Debug Info Entry associated with this
128     /// Accelerator Entry or None if the DIE offset is not recorded in this
129     /// Accelerator Entry. The returned offset is relative to the start of the
130     /// Section containing the DIE.
131     Optional<uint64_t> getDIESectionOffset() const;
132 
133     Optional<dwarf::Tag> getTag() const override;
134 
135     /// Returns the value of the Atom in this Accelerator Entry, if the Entry
136     /// contains such Atom.
137     Optional<DWARFFormValue> lookup(HeaderData::AtomType Atom) const;
138 
139     friend class AppleAcceleratorTable;
140     friend class ValueIterator;
141   };
142 
143   class ValueIterator : public std::iterator<std::input_iterator_tag, Entry> {
144     const AppleAcceleratorTable *AccelTable = nullptr;
145     Entry Current;           ///< The current entry.
146     uint64_t DataOffset = 0; ///< Offset into the section.
147     unsigned Data = 0; ///< Current data entry.
148     unsigned NumData = 0; ///< Number of data entries.
149 
150     /// Advance the iterator.
151     void Next();
152   public:
153     /// Construct a new iterator for the entries at \p DataOffset.
154     ValueIterator(const AppleAcceleratorTable &AccelTable, uint64_t DataOffset);
155     /// End marker.
156     ValueIterator() = default;
157 
158     const Entry &operator*() const { return Current; }
159     ValueIterator &operator++() { Next(); return *this; }
160     ValueIterator operator++(int) {
161       ValueIterator I = *this;
162       Next();
163       return I;
164     }
165     friend bool operator==(const ValueIterator &A, const ValueIterator &B) {
166       return A.NumData == B.NumData && A.DataOffset == B.DataOffset;
167     }
168     friend bool operator!=(const ValueIterator &A, const ValueIterator &B) {
169       return !(A == B);
170     }
171   };
172 
173   AppleAcceleratorTable(const DWARFDataExtractor &AccelSection,
174                         DataExtractor StringSection)
175       : DWARFAcceleratorTable(AccelSection, StringSection) {}
176 
177   Error extract() override;
178   uint32_t getNumBuckets();
179   uint32_t getNumHashes();
180   uint32_t getSizeHdr();
181   uint32_t getHeaderDataLength();
182 
183   /// Return the Atom description, which can be used to interpret the raw values
184   /// of the Accelerator Entries in this table.
185   ArrayRef<std::pair<HeaderData::AtomType, HeaderData::Form>> getAtomsDesc();
186   bool validateForms();
187 
188   /// Return information related to the DWARF DIE we're looking for when
189   /// performing a lookup by name.
190   ///
191   /// \param HashDataOffset an offset into the hash data table
192   /// \returns <DieOffset, DieTag>
193   /// DieOffset is the offset into the .debug_info section for the DIE
194   /// related to the input hash data offset.
195   /// DieTag is the tag of the DIE
196   std::pair<uint64_t, dwarf::Tag> readAtoms(uint64_t *HashDataOffset);
197   void dump(raw_ostream &OS) const override;
198 
199   /// Look up all entries in the accelerator table matching \c Key.
200   iterator_range<ValueIterator> equal_range(StringRef Key) const;
201 };
202 
203 /// .debug_names section consists of one or more units. Each unit starts with a
204 /// header, which is followed by a list of compilation units, local and foreign
205 /// type units.
206 ///
207 /// These may be followed by an (optional) hash lookup table, which consists of
208 /// an array of buckets and hashes similar to the apple tables above. The only
209 /// difference is that the hashes array is 1-based, and consequently an empty
210 /// bucket is denoted by 0 and not UINT32_MAX.
211 ///
212 /// Next is the name table, which consists of an array of names and array of
213 /// entry offsets. This is different from the apple tables, which store names
214 /// next to the actual entries.
215 ///
216 /// The structure of the entries is described by an abbreviations table, which
217 /// comes after the name table. Unlike the apple tables, which have a uniform
218 /// entry structure described in the header, each .debug_names entry may have
219 /// different index attributes (DW_IDX_???) attached to it.
220 ///
221 /// The last segment consists of a list of entries, which is a 0-terminated list
222 /// referenced by the name table and interpreted with the help of the
223 /// abbreviation table.
224 class DWARFDebugNames : public DWARFAcceleratorTable {
225 public:
226   class NameIndex;
227   class NameIterator;
228   class ValueIterator;
229 
230   /// DWARF v5 Name Index header.
231   struct Header {
232     uint64_t UnitLength;
233     dwarf::DwarfFormat Format;
234     uint16_t Version;
235     uint32_t CompUnitCount;
236     uint32_t LocalTypeUnitCount;
237     uint32_t ForeignTypeUnitCount;
238     uint32_t BucketCount;
239     uint32_t NameCount;
240     uint32_t AbbrevTableSize;
241     uint32_t AugmentationStringSize;
242     SmallString<8> AugmentationString;
243 
244     Error extract(const DWARFDataExtractor &AS, uint64_t *Offset);
245     void dump(ScopedPrinter &W) const;
246   };
247 
248   /// Index attribute and its encoding.
249   struct AttributeEncoding {
250     dwarf::Index Index;
251     dwarf::Form Form;
252 
253     constexpr AttributeEncoding(dwarf::Index Index, dwarf::Form Form)
254         : Index(Index), Form(Form) {}
255 
256     friend bool operator==(const AttributeEncoding &LHS,
257                            const AttributeEncoding &RHS) {
258       return LHS.Index == RHS.Index && LHS.Form == RHS.Form;
259     }
260   };
261 
262   /// Abbreviation describing the encoding of Name Index entries.
263   struct Abbrev {
264     uint32_t Code;  ///< Abbreviation code
265     dwarf::Tag Tag; ///< Dwarf Tag of the described entity.
266     std::vector<AttributeEncoding> Attributes; ///< List of index attributes.
267 
268     Abbrev(uint32_t Code, dwarf::Tag Tag,
269            std::vector<AttributeEncoding> Attributes)
270         : Code(Code), Tag(Tag), Attributes(std::move(Attributes)) {}
271 
272     void dump(ScopedPrinter &W) const;
273   };
274 
275   /// DWARF v5-specific implementation of an Accelerator Entry.
276   class Entry final : public DWARFAcceleratorTable::Entry {
277     const NameIndex *NameIdx;
278     const Abbrev *Abbr;
279 
280     Entry(const NameIndex &NameIdx, const Abbrev &Abbr);
281 
282   public:
283     Optional<uint64_t> getCUOffset() const override;
284     Optional<dwarf::Tag> getTag() const override { return tag(); }
285 
286     /// Returns the Index into the Compilation Unit list of the owning Name
287     /// Index or None if this Accelerator Entry does not have an associated
288     /// Compilation Unit. It is up to the user to verify that the returned Index
289     /// is valid in the owning NameIndex (or use getCUOffset(), which will
290     /// handle that check itself). Note that entries in NameIndexes which index
291     /// just a single Compilation Unit are implicitly associated with that unit,
292     /// so this function will return 0 even without an explicit
293     /// DW_IDX_compile_unit attribute.
294     Optional<uint64_t> getCUIndex() const;
295 
296     /// .debug_names-specific getter, which always succeeds (DWARF v5 index
297     /// entries always have a tag).
298     dwarf::Tag tag() const { return Abbr->Tag; }
299 
300     /// Returns the Offset of the DIE within the containing CU or TU.
301     Optional<uint64_t> getDIEUnitOffset() const;
302 
303     /// Return the Abbreviation that can be used to interpret the raw values of
304     /// this Accelerator Entry.
305     const Abbrev &getAbbrev() const { return *Abbr; }
306 
307     /// Returns the value of the Index Attribute in this Accelerator Entry, if
308     /// the Entry contains such Attribute.
309     Optional<DWARFFormValue> lookup(dwarf::Index Index) const;
310 
311     void dump(ScopedPrinter &W) const;
312 
313     friend class NameIndex;
314     friend class ValueIterator;
315   };
316 
317   /// Error returned by NameIndex::getEntry to report it has reached the end of
318   /// the entry list.
319   class SentinelError : public ErrorInfo<SentinelError> {
320   public:
321     static char ID;
322 
323     void log(raw_ostream &OS) const override { OS << "Sentinel"; }
324     std::error_code convertToErrorCode() const override;
325   };
326 
327 private:
328   /// DenseMapInfo for struct Abbrev.
329   struct AbbrevMapInfo {
330     static Abbrev getEmptyKey();
331     static Abbrev getTombstoneKey();
332     static unsigned getHashValue(uint32_t Code) {
333       return DenseMapInfo<uint32_t>::getHashValue(Code);
334     }
335     static unsigned getHashValue(const Abbrev &Abbr) {
336       return getHashValue(Abbr.Code);
337     }
338     static bool isEqual(uint32_t LHS, const Abbrev &RHS) {
339       return LHS == RHS.Code;
340     }
341     static bool isEqual(const Abbrev &LHS, const Abbrev &RHS) {
342       return LHS.Code == RHS.Code;
343     }
344   };
345 
346 public:
347   /// A single entry in the Name Table (DWARF v5 sect. 6.1.1.4.6) of the Name
348   /// Index.
349   class NameTableEntry {
350     DataExtractor StrData;
351 
352     uint32_t Index;
353     uint64_t StringOffset;
354     uint64_t EntryOffset;
355 
356   public:
357     NameTableEntry(const DataExtractor &StrData, uint32_t Index,
358                    uint64_t StringOffset, uint64_t EntryOffset)
359         : StrData(StrData), Index(Index), StringOffset(StringOffset),
360           EntryOffset(EntryOffset) {}
361 
362     /// Return the index of this name in the parent Name Index.
363     uint32_t getIndex() const { return Index; }
364 
365     /// Returns the offset of the name of the described entities.
366     uint64_t getStringOffset() const { return StringOffset; }
367 
368     /// Return the string referenced by this name table entry or nullptr if the
369     /// string offset is not valid.
370     const char *getString() const {
371       uint64_t Off = StringOffset;
372       return StrData.getCStr(&Off);
373     }
374 
375     /// Returns the offset of the first Entry in the list.
376     uint64_t getEntryOffset() const { return EntryOffset; }
377   };
378 
379   /// Represents a single accelerator table within the DWARF v5 .debug_names
380   /// section.
381   class NameIndex {
382     DenseSet<Abbrev, AbbrevMapInfo> Abbrevs;
383     struct Header Hdr;
384     const DWARFDebugNames &Section;
385 
386     // Base of the whole unit and of various important tables, as offsets from
387     // the start of the section.
388     uint64_t Base;
389     uint64_t CUsBase;
390     uint64_t BucketsBase;
391     uint64_t HashesBase;
392     uint64_t StringOffsetsBase;
393     uint64_t EntryOffsetsBase;
394     uint64_t EntriesBase;
395 
396     void dumpCUs(ScopedPrinter &W) const;
397     void dumpLocalTUs(ScopedPrinter &W) const;
398     void dumpForeignTUs(ScopedPrinter &W) const;
399     void dumpAbbreviations(ScopedPrinter &W) const;
400     bool dumpEntry(ScopedPrinter &W, uint64_t *Offset) const;
401     void dumpName(ScopedPrinter &W, const NameTableEntry &NTE,
402                   Optional<uint32_t> Hash) const;
403     void dumpBucket(ScopedPrinter &W, uint32_t Bucket) const;
404 
405     Expected<AttributeEncoding> extractAttributeEncoding(uint64_t *Offset);
406 
407     Expected<std::vector<AttributeEncoding>>
408     extractAttributeEncodings(uint64_t *Offset);
409 
410     Expected<Abbrev> extractAbbrev(uint64_t *Offset);
411 
412   public:
413     NameIndex(const DWARFDebugNames &Section, uint64_t Base)
414         : Section(Section), Base(Base) {}
415 
416     /// Reads offset of compilation unit CU. CU is 0-based.
417     uint64_t getCUOffset(uint32_t CU) const;
418     uint32_t getCUCount() const { return Hdr.CompUnitCount; }
419 
420     /// Reads offset of local type unit TU, TU is 0-based.
421     uint64_t getLocalTUOffset(uint32_t TU) const;
422     uint32_t getLocalTUCount() const { return Hdr.LocalTypeUnitCount; }
423 
424     /// Reads signature of foreign type unit TU. TU is 0-based.
425     uint64_t getForeignTUSignature(uint32_t TU) const;
426     uint32_t getForeignTUCount() const { return Hdr.ForeignTypeUnitCount; }
427 
428     /// Reads an entry in the Bucket Array for the given Bucket. The returned
429     /// value is a (1-based) index into the Names, StringOffsets and
430     /// EntryOffsets arrays. The input Bucket index is 0-based.
431     uint32_t getBucketArrayEntry(uint32_t Bucket) const;
432     uint32_t getBucketCount() const { return Hdr.BucketCount; }
433 
434     /// Reads an entry in the Hash Array for the given Index. The input Index
435     /// is 1-based.
436     uint32_t getHashArrayEntry(uint32_t Index) const;
437 
438     /// Reads an entry in the Name Table for the given Index. The Name Table
439     /// consists of two arrays -- String Offsets and Entry Offsets. The returned
440     /// offsets are relative to the starts of respective sections. Input Index
441     /// is 1-based.
442     NameTableEntry getNameTableEntry(uint32_t Index) const;
443 
444     uint32_t getNameCount() const { return Hdr.NameCount; }
445 
446     const DenseSet<Abbrev, AbbrevMapInfo> &getAbbrevs() const {
447       return Abbrevs;
448     }
449 
450     Expected<Entry> getEntry(uint64_t *Offset) const;
451 
452     /// Look up all entries in this Name Index matching \c Key.
453     iterator_range<ValueIterator> equal_range(StringRef Key) const;
454 
455     NameIterator begin() const { return NameIterator(this, 1); }
456     NameIterator end() const { return NameIterator(this, getNameCount() + 1); }
457 
458     Error extract();
459     uint64_t getUnitOffset() const { return Base; }
460     uint64_t getNextUnitOffset() const {
461       return Base + dwarf::getUnitLengthFieldByteSize(Hdr.Format) +
462              Hdr.UnitLength;
463     }
464     void dump(ScopedPrinter &W) const;
465 
466     friend class DWARFDebugNames;
467   };
468 
469   class ValueIterator : public std::iterator<std::input_iterator_tag, Entry> {
470 
471     /// The Name Index we are currently iterating through. The implementation
472     /// relies on the fact that this can also be used as an iterator into the
473     /// "NameIndices" vector in the Accelerator section.
474     const NameIndex *CurrentIndex = nullptr;
475 
476     /// Whether this is a local iterator (searches in CurrentIndex only) or not
477     /// (searches all name indices).
478     bool IsLocal;
479 
480     Optional<Entry> CurrentEntry;
481     uint64_t DataOffset = 0; ///< Offset into the section.
482     std::string Key;         ///< The Key we are searching for.
483     Optional<uint32_t> Hash; ///< Hash of Key, if it has been computed.
484 
485     bool getEntryAtCurrentOffset();
486     Optional<uint64_t> findEntryOffsetInCurrentIndex();
487     bool findInCurrentIndex();
488     void searchFromStartOfCurrentIndex();
489     void next();
490 
491     /// Set the iterator to the "end" state.
492     void setEnd() { *this = ValueIterator(); }
493 
494   public:
495     /// Create a "begin" iterator for looping over all entries in the
496     /// accelerator table matching Key. The iterator will run through all Name
497     /// Indexes in the section in sequence.
498     ValueIterator(const DWARFDebugNames &AccelTable, StringRef Key);
499 
500     /// Create a "begin" iterator for looping over all entries in a specific
501     /// Name Index. Other indices in the section will not be visited.
502     ValueIterator(const NameIndex &NI, StringRef Key);
503 
504     /// End marker.
505     ValueIterator() = default;
506 
507     const Entry &operator*() const { return *CurrentEntry; }
508     ValueIterator &operator++() {
509       next();
510       return *this;
511     }
512     ValueIterator operator++(int) {
513       ValueIterator I = *this;
514       next();
515       return I;
516     }
517 
518     friend bool operator==(const ValueIterator &A, const ValueIterator &B) {
519       return A.CurrentIndex == B.CurrentIndex && A.DataOffset == B.DataOffset;
520     }
521     friend bool operator!=(const ValueIterator &A, const ValueIterator &B) {
522       return !(A == B);
523     }
524   };
525 
526   class NameIterator {
527 
528     /// The Name Index we are iterating through.
529     const NameIndex *CurrentIndex;
530 
531     /// The current name in the Name Index.
532     uint32_t CurrentName;
533 
534     void next() {
535       assert(CurrentName <= CurrentIndex->getNameCount());
536       ++CurrentName;
537     }
538 
539   public:
540     using iterator_category = std::input_iterator_tag;
541     using value_type = NameTableEntry;
542     using difference_type = uint32_t;
543     using pointer = NameTableEntry *;
544     using reference = NameTableEntry; // We return entries by value.
545 
546     /// Creates an iterator whose initial position is name CurrentName in
547     /// CurrentIndex.
548     NameIterator(const NameIndex *CurrentIndex, uint32_t CurrentName)
549         : CurrentIndex(CurrentIndex), CurrentName(CurrentName) {}
550 
551     NameTableEntry operator*() const {
552       return CurrentIndex->getNameTableEntry(CurrentName);
553     }
554     NameIterator &operator++() {
555       next();
556       return *this;
557     }
558     NameIterator operator++(int) {
559       NameIterator I = *this;
560       next();
561       return I;
562     }
563 
564     friend bool operator==(const NameIterator &A, const NameIterator &B) {
565       return A.CurrentIndex == B.CurrentIndex && A.CurrentName == B.CurrentName;
566     }
567     friend bool operator!=(const NameIterator &A, const NameIterator &B) {
568       return !(A == B);
569     }
570   };
571 
572 private:
573   SmallVector<NameIndex, 0> NameIndices;
574   DenseMap<uint64_t, const NameIndex *> CUToNameIndex;
575 
576 public:
577   DWARFDebugNames(const DWARFDataExtractor &AccelSection,
578                   DataExtractor StringSection)
579       : DWARFAcceleratorTable(AccelSection, StringSection) {}
580 
581   Error extract() override;
582   void dump(raw_ostream &OS) const override;
583 
584   /// Look up all entries in the accelerator table matching \c Key.
585   iterator_range<ValueIterator> equal_range(StringRef Key) const;
586 
587   using const_iterator = SmallVector<NameIndex, 0>::const_iterator;
588   const_iterator begin() const { return NameIndices.begin(); }
589   const_iterator end() const { return NameIndices.end(); }
590 
591   /// Return the Name Index covering the compile unit at CUOffset, or nullptr if
592   /// there is no Name Index covering that unit.
593   const NameIndex *getCUNameIndex(uint64_t CUOffset);
594 };
595 
596 } // end namespace llvm
597 
598 #endif // LLVM_DEBUGINFO_DWARFACCELERATORTABLE_H
599