1 //===- DWARFUnit.h ----------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H
10 #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H
11 
12 #include "llvm/ADT/Optional.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/iterator_range.h"
17 #include "llvm/BinaryFormat/Dwarf.h"
18 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
19 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
21 #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h"
22 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
23 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
24 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
25 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
26 #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
27 #include "llvm/Support/DataExtractor.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <cstdint>
32 #include <map>
33 #include <memory>
34 #include <utility>
35 #include <vector>
36 
37 namespace llvm {
38 
39 class DWARFAbbreviationDeclarationSet;
40 class DWARFContext;
41 class DWARFDebugAbbrev;
42 class DWARFUnit;
43 
44 /// Base class describing the header of any kind of "unit."  Some information
45 /// is specific to certain unit types.  We separate this class out so we can
46 /// parse the header before deciding what specific kind of unit to construct.
47 class DWARFUnitHeader {
48   // Offset within section.
49   uint64_t Offset = 0;
50   // Version, address size, and DWARF format.
51   dwarf::FormParams FormParams;
52   uint64_t Length = 0;
53   uint64_t AbbrOffset = 0;
54 
55   // For DWO units only.
56   const DWARFUnitIndex::Entry *IndexEntry = nullptr;
57 
58   // For type units only.
59   uint64_t TypeHash = 0;
60   uint64_t TypeOffset = 0;
61 
62   // For v5 split or skeleton compile units only.
63   Optional<uint64_t> DWOId;
64 
65   // Unit type as parsed, or derived from the section kind.
66   uint8_t UnitType = 0;
67 
68   // Size as parsed. uint8_t for compactness.
69   uint8_t Size = 0;
70 
71 public:
72   /// Parse a unit header from \p debug_info starting at \p offset_ptr.
73   bool extract(DWARFContext &Context, const DWARFDataExtractor &debug_info,
74                uint64_t *offset_ptr, DWARFSectionKind Kind = DW_SECT_INFO,
75                const DWARFUnitIndex *Index = nullptr,
76                const DWARFUnitIndex::Entry *Entry = nullptr);
77   uint64_t getOffset() const { return Offset; }
78   const dwarf::FormParams &getFormParams() const { return FormParams; }
79   uint16_t getVersion() const { return FormParams.Version; }
80   dwarf::DwarfFormat getFormat() const { return FormParams.Format; }
81   uint8_t getAddressByteSize() const { return FormParams.AddrSize; }
82   uint8_t getRefAddrByteSize() const { return FormParams.getRefAddrByteSize(); }
83   uint8_t getDwarfOffsetByteSize() const {
84     return FormParams.getDwarfOffsetByteSize();
85   }
86   uint64_t getLength() const { return Length; }
87   uint64_t getAbbrOffset() const { return AbbrOffset; }
88   Optional<uint64_t> getDWOId() const { return DWOId; }
89   void setDWOId(uint64_t Id) {
90     assert((!DWOId || *DWOId == Id) && "setting DWOId to a different value");
91     DWOId = Id;
92   }
93   const DWARFUnitIndex::Entry *getIndexEntry() const { return IndexEntry; }
94   uint64_t getTypeHash() const { return TypeHash; }
95   uint64_t getTypeOffset() const { return TypeOffset; }
96   uint8_t getUnitType() const { return UnitType; }
97   bool isTypeUnit() const {
98     return UnitType == dwarf::DW_UT_type || UnitType == dwarf::DW_UT_split_type;
99   }
100   uint8_t getSize() const { return Size; }
101   uint8_t getUnitLengthFieldByteSize() const {
102     return dwarf::getUnitLengthFieldByteSize(FormParams.Format);
103   }
104   uint64_t getNextUnitOffset() const {
105     return Offset + Length + getUnitLengthFieldByteSize();
106   }
107 };
108 
109 const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
110                                         DWARFSectionKind Kind);
111 
112 /// Describe a collection of units. Intended to hold all units either from
113 /// .debug_info and .debug_types, or from .debug_info.dwo and .debug_types.dwo.
114 class DWARFUnitVector final : public SmallVector<std::unique_ptr<DWARFUnit>, 1> {
115   std::function<std::unique_ptr<DWARFUnit>(uint64_t, DWARFSectionKind,
116                                            const DWARFSection *,
117                                            const DWARFUnitIndex::Entry *)>
118       Parser;
119   int NumInfoUnits = -1;
120 
121 public:
122   using UnitVector = SmallVectorImpl<std::unique_ptr<DWARFUnit>>;
123   using iterator = typename UnitVector::iterator;
124   using iterator_range = llvm::iterator_range<typename UnitVector::iterator>;
125 
126   DWARFUnit *getUnitForOffset(uint64_t Offset) const;
127   DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E);
128 
129   /// Read units from a .debug_info or .debug_types section.  Calls made
130   /// before finishedInfoUnits() are assumed to be for .debug_info sections,
131   /// calls after finishedInfoUnits() are for .debug_types sections.  Caller
132   /// must not mix calls to addUnitsForSection and addUnitsForDWOSection.
133   void addUnitsForSection(DWARFContext &C, const DWARFSection &Section,
134                           DWARFSectionKind SectionKind);
135   /// Read units from a .debug_info.dwo or .debug_types.dwo section.  Calls
136   /// made before finishedInfoUnits() are assumed to be for .debug_info.dwo
137   /// sections, calls after finishedInfoUnits() are for .debug_types.dwo
138   /// sections.  Caller must not mix calls to addUnitsForSection and
139   /// addUnitsForDWOSection.
140   void addUnitsForDWOSection(DWARFContext &C, const DWARFSection &DWOSection,
141                              DWARFSectionKind SectionKind, bool Lazy = false);
142 
143   /// Add an existing DWARFUnit to this UnitVector. This is used by the DWARF
144   /// verifier to process unit separately.
145   DWARFUnit *addUnit(std::unique_ptr<DWARFUnit> Unit);
146 
147   /// Returns number of all units held by this instance.
148   unsigned getNumUnits() const { return size(); }
149   /// Returns number of units from all .debug_info[.dwo] sections.
150   unsigned getNumInfoUnits() const {
151     return NumInfoUnits == -1 ? size() : NumInfoUnits;
152   }
153   /// Returns number of units from all .debug_types[.dwo] sections.
154   unsigned getNumTypesUnits() const { return size() - NumInfoUnits; }
155   /// Indicate that parsing .debug_info[.dwo] is done, and remaining units
156   /// will be from .debug_types[.dwo].
157   void finishedInfoUnits() { NumInfoUnits = size(); }
158 
159 private:
160   void addUnitsImpl(DWARFContext &Context, const DWARFObject &Obj,
161                     const DWARFSection &Section, const DWARFDebugAbbrev *DA,
162                     const DWARFSection *RS, const DWARFSection *LocSection,
163                     StringRef SS, const DWARFSection &SOS,
164                     const DWARFSection *AOS, const DWARFSection &LS, bool LE,
165                     bool IsDWO, bool Lazy, DWARFSectionKind SectionKind);
166 };
167 
168 /// Represents base address of the CU.
169 /// Represents a unit's contribution to the string offsets table.
170 struct StrOffsetsContributionDescriptor {
171   uint64_t Base = 0;
172   /// The contribution size not including the header.
173   uint64_t Size = 0;
174   /// Format and version.
175   dwarf::FormParams FormParams = {0, 0, dwarf::DwarfFormat::DWARF32};
176 
177   StrOffsetsContributionDescriptor(uint64_t Base, uint64_t Size,
178                                    uint8_t Version, dwarf::DwarfFormat Format)
179       : Base(Base), Size(Size), FormParams({Version, 0, Format}) {}
180   StrOffsetsContributionDescriptor() = default;
181 
182   uint8_t getVersion() const { return FormParams.Version; }
183   dwarf::DwarfFormat getFormat() const { return FormParams.Format; }
184   uint8_t getDwarfOffsetByteSize() const {
185     return FormParams.getDwarfOffsetByteSize();
186   }
187   /// Determine whether a contribution to the string offsets table is
188   /// consistent with the relevant section size and that its length is
189   /// a multiple of the size of one of its entries.
190   Expected<StrOffsetsContributionDescriptor>
191   validateContributionSize(DWARFDataExtractor &DA);
192 };
193 
194 class DWARFUnit {
195   DWARFContext &Context;
196   /// Section containing this DWARFUnit.
197   const DWARFSection &InfoSection;
198 
199   DWARFUnitHeader Header;
200   const DWARFDebugAbbrev *Abbrev;
201   const DWARFSection *RangeSection;
202   uint64_t RangeSectionBase;
203   const DWARFSection *LocSection;
204   uint64_t LocSectionBase;
205 
206   /// Location table of this unit.
207   std::unique_ptr<DWARFLocationTable> LocTable;
208 
209   const DWARFSection &LineSection;
210   StringRef StringSection;
211   const DWARFSection &StringOffsetSection;
212   const DWARFSection *AddrOffsetSection;
213   Optional<uint64_t> AddrOffsetSectionBase;
214   bool isLittleEndian;
215   bool IsDWO;
216   const DWARFUnitVector &UnitVector;
217 
218   /// Start, length, and DWARF format of the unit's contribution to the string
219   /// offsets table (DWARF v5).
220   Optional<StrOffsetsContributionDescriptor> StringOffsetsTableContribution;
221 
222   /// A table of range lists (DWARF v5 and later).
223   Optional<DWARFDebugRnglistTable> RngListTable;
224   Optional<DWARFListTableHeader> LoclistTableHeader;
225 
226   mutable const DWARFAbbreviationDeclarationSet *Abbrevs;
227   llvm::Optional<object::SectionedAddress> BaseAddr;
228   /// The compile unit debug information entry items.
229   std::vector<DWARFDebugInfoEntry> DieArray;
230 
231   /// Map from range's start address to end address and corresponding DIE.
232   /// IntervalMap does not support range removal, as a result, we use the
233   /// std::map::upper_bound for address range lookup.
234   std::map<uint64_t, std::pair<uint64_t, DWARFDie>> AddrDieMap;
235 
236   using die_iterator_range =
237       iterator_range<std::vector<DWARFDebugInfoEntry>::iterator>;
238 
239   std::shared_ptr<DWARFUnit> DWO;
240 
241   uint32_t getDIEIndex(const DWARFDebugInfoEntry *Die) {
242     auto First = DieArray.data();
243     assert(Die >= First && Die < First + DieArray.size());
244     return Die - First;
245   }
246 
247 protected:
248   const DWARFUnitHeader &getHeader() const { return Header; }
249 
250   /// Size in bytes of the parsed unit header.
251   uint32_t getHeaderSize() const { return Header.getSize(); }
252 
253   /// Find the unit's contribution to the string offsets table and determine its
254   /// length and form. The given offset is expected to be derived from the unit
255   /// DIE's DW_AT_str_offsets_base attribute.
256   Expected<Optional<StrOffsetsContributionDescriptor>>
257   determineStringOffsetsTableContribution(DWARFDataExtractor &DA);
258 
259   /// Find the unit's contribution to the string offsets table and determine its
260   /// length and form. The given offset is expected to be 0 in a dwo file or,
261   /// in a dwp file, the start of the unit's contribution to the string offsets
262   /// table section (as determined by the index table).
263   Expected<Optional<StrOffsetsContributionDescriptor>>
264   determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA);
265 
266 public:
267   DWARFUnit(DWARFContext &Context, const DWARFSection &Section,
268             const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA,
269             const DWARFSection *RS, const DWARFSection *LocSection,
270             StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS,
271             const DWARFSection &LS, bool LE, bool IsDWO,
272             const DWARFUnitVector &UnitVector);
273 
274   virtual ~DWARFUnit();
275 
276   bool isDWOUnit() const { return IsDWO; }
277   DWARFContext& getContext() const { return Context; }
278   const DWARFSection &getInfoSection() const { return InfoSection; }
279   uint64_t getOffset() const { return Header.getOffset(); }
280   const dwarf::FormParams &getFormParams() const {
281     return Header.getFormParams();
282   }
283   uint16_t getVersion() const { return Header.getVersion(); }
284   uint8_t getAddressByteSize() const { return Header.getAddressByteSize(); }
285   uint8_t getRefAddrByteSize() const { return Header.getRefAddrByteSize(); }
286   uint8_t getDwarfOffsetByteSize() const {
287     return Header.getDwarfOffsetByteSize();
288   }
289   uint64_t getLength() const { return Header.getLength(); }
290   uint8_t getUnitType() const { return Header.getUnitType(); }
291   bool isTypeUnit() const { return Header.isTypeUnit(); }
292   uint64_t getNextUnitOffset() const { return Header.getNextUnitOffset(); }
293   const DWARFSection &getLineSection() const { return LineSection; }
294   StringRef getStringSection() const { return StringSection; }
295   const DWARFSection &getStringOffsetSection() const {
296     return StringOffsetSection;
297   }
298 
299   void setAddrOffsetSection(const DWARFSection *AOS, uint32_t Base) {
300     AddrOffsetSection = AOS;
301     AddrOffsetSectionBase = Base;
302   }
303 
304   /// Recursively update address to Die map.
305   void updateAddressDieMap(DWARFDie Die);
306 
307   void setRangesSection(const DWARFSection *RS, uint64_t Base) {
308     RangeSection = RS;
309     RangeSectionBase = Base;
310   }
311   void setLocSection(const DWARFSection *LS, uint64_t Base) {
312     LocSection = LS;
313     LocSectionBase = Base;
314   }
315 
316   uint64_t getLocSectionBase() const {
317     return LocSectionBase;
318   }
319 
320   Optional<object::SectionedAddress>
321   getAddrOffsetSectionItem(uint32_t Index) const;
322   Optional<uint64_t> getStringOffsetSectionItem(uint32_t Index) const;
323 
324   DWARFDataExtractor getDebugInfoExtractor() const;
325 
326   DataExtractor getStringExtractor() const {
327     return DataExtractor(StringSection, false, 0);
328   }
329 
330   const DWARFLocationTable &getLocationTable() { return *LocTable; }
331 
332   /// Extract the range list referenced by this compile unit from the
333   /// .debug_ranges section. If the extraction is unsuccessful, an error
334   /// is returned. Successful extraction requires that the compile unit
335   /// has already been extracted.
336   Error extractRangeList(uint64_t RangeListOffset,
337                          DWARFDebugRangeList &RangeList) const;
338   void clear();
339 
340   const Optional<StrOffsetsContributionDescriptor> &
341   getStringOffsetsTableContribution() const {
342     return StringOffsetsTableContribution;
343   }
344 
345   uint8_t getDwarfStringOffsetsByteSize() const {
346     assert(StringOffsetsTableContribution);
347     return StringOffsetsTableContribution->getDwarfOffsetByteSize();
348   }
349 
350   uint64_t getStringOffsetsBase() const {
351     assert(StringOffsetsTableContribution);
352     return StringOffsetsTableContribution->Base;
353   }
354 
355   const DWARFAbbreviationDeclarationSet *getAbbreviations() const;
356 
357   static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) {
358     switch (UnitType) {
359     case dwarf::DW_UT_compile:
360       return Tag == dwarf::DW_TAG_compile_unit;
361     case dwarf::DW_UT_type:
362       return Tag == dwarf::DW_TAG_type_unit;
363     case dwarf::DW_UT_partial:
364       return Tag == dwarf::DW_TAG_partial_unit;
365     case dwarf::DW_UT_skeleton:
366       return Tag == dwarf::DW_TAG_skeleton_unit;
367     case dwarf::DW_UT_split_compile:
368     case dwarf::DW_UT_split_type:
369       return dwarf::isUnitType(Tag);
370     }
371     return false;
372   }
373 
374   /// Return the number of bytes for the header of a unit of
375   /// UnitType type.
376   ///
377   /// This function must be called with a valid unit type which in
378   /// DWARF5 is defined as one of the following six types.
379   static uint32_t getDWARF5HeaderSize(uint8_t UnitType) {
380     switch (UnitType) {
381     case dwarf::DW_UT_compile:
382     case dwarf::DW_UT_partial:
383       return 12;
384     case dwarf::DW_UT_skeleton:
385     case dwarf::DW_UT_split_compile:
386       return 20;
387     case dwarf::DW_UT_type:
388     case dwarf::DW_UT_split_type:
389       return 24;
390     }
391     llvm_unreachable("Invalid UnitType.");
392   }
393 
394   llvm::Optional<object::SectionedAddress> getBaseAddress();
395 
396   DWARFDie getUnitDIE(bool ExtractUnitDIEOnly = true) {
397     extractDIEsIfNeeded(ExtractUnitDIEOnly);
398     if (DieArray.empty())
399       return DWARFDie();
400     return DWARFDie(this, &DieArray[0]);
401   }
402 
403   DWARFDie getNonSkeletonUnitDIE(bool ExtractUnitDIEOnly = true) {
404     parseDWO();
405     if (DWO)
406       return DWO->getUnitDIE(ExtractUnitDIEOnly);
407     return getUnitDIE(ExtractUnitDIEOnly);
408   }
409 
410   const char *getCompilationDir();
411   Optional<uint64_t> getDWOId() {
412     extractDIEsIfNeeded(/*CUDieOnly*/ true);
413     return getHeader().getDWOId();
414   }
415   void setDWOId(uint64_t NewID) { Header.setDWOId(NewID); }
416 
417   /// Return a vector of address ranges resulting from a (possibly encoded)
418   /// range list starting at a given offset in the appropriate ranges section.
419   Expected<DWARFAddressRangesVector> findRnglistFromOffset(uint64_t Offset);
420 
421   /// Return a vector of address ranges retrieved from an encoded range
422   /// list whose offset is found via a table lookup given an index (DWARF v5
423   /// and later).
424   Expected<DWARFAddressRangesVector> findRnglistFromIndex(uint32_t Index);
425 
426   /// Return a rangelist's offset based on an index. The index designates
427   /// an entry in the rangelist table's offset array and is supplied by
428   /// DW_FORM_rnglistx.
429   Optional<uint64_t> getRnglistOffset(uint32_t Index) {
430     if (!RngListTable)
431       return None;
432     if (Optional<uint64_t> Off = RngListTable->getOffsetEntry(Index))
433       return *Off + RangeSectionBase;
434     return None;
435   }
436 
437   Optional<uint64_t> getLoclistOffset(uint32_t Index) {
438     if (!LoclistTableHeader)
439       return None;
440     if (Optional<uint64_t> Off = LoclistTableHeader->getOffsetEntry(Index))
441       return *Off + getLocSectionBase();
442     return None;
443   }
444   Expected<DWARFAddressRangesVector> collectAddressRanges();
445 
446   Expected<DWARFLocationExpressionsVector>
447   findLoclistFromOffset(uint64_t Offset);
448 
449   /// Returns subprogram DIE with address range encompassing the provided
450   /// address. The pointer is alive as long as parsed compile unit DIEs are not
451   /// cleared.
452   DWARFDie getSubroutineForAddress(uint64_t Address);
453 
454   /// getInlinedChainForAddress - fetches inlined chain for a given address.
455   /// Returns empty chain if there is no subprogram containing address. The
456   /// chain is valid as long as parsed compile unit DIEs are not cleared.
457   void getInlinedChainForAddress(uint64_t Address,
458                                  SmallVectorImpl<DWARFDie> &InlinedChain);
459 
460   /// Return the DWARFUnitVector containing this unit.
461   const DWARFUnitVector &getUnitVector() const { return UnitVector; }
462 
463   /// Returns the number of DIEs in the unit. Parses the unit
464   /// if necessary.
465   unsigned getNumDIEs() {
466     extractDIEsIfNeeded(false);
467     return DieArray.size();
468   }
469 
470   /// Return the index of a DIE inside the unit's DIE vector.
471   ///
472   /// It is illegal to call this method with a DIE that hasn't be
473   /// created by this unit. In other word, it's illegal to call this
474   /// method on a DIE that isn't accessible by following
475   /// children/sibling links starting from this unit's getUnitDIE().
476   uint32_t getDIEIndex(const DWARFDie &D) {
477     return getDIEIndex(D.getDebugInfoEntry());
478   }
479 
480   /// Return the DIE object at the given index.
481   DWARFDie getDIEAtIndex(unsigned Index) {
482     assert(Index < DieArray.size());
483     return DWARFDie(this, &DieArray[Index]);
484   }
485 
486   DWARFDie getParent(const DWARFDebugInfoEntry *Die);
487   DWARFDie getSibling(const DWARFDebugInfoEntry *Die);
488   DWARFDie getPreviousSibling(const DWARFDebugInfoEntry *Die);
489   DWARFDie getFirstChild(const DWARFDebugInfoEntry *Die);
490   DWARFDie getLastChild(const DWARFDebugInfoEntry *Die);
491 
492   /// Return the DIE object for a given offset inside the
493   /// unit's DIE vector.
494   ///
495   /// The unit needs to have its DIEs extracted for this method to work.
496   DWARFDie getDIEForOffset(uint64_t Offset) {
497     extractDIEsIfNeeded(false);
498     assert(!DieArray.empty());
499     auto It =
500         llvm::partition_point(DieArray, [=](const DWARFDebugInfoEntry &DIE) {
501           return DIE.getOffset() < Offset;
502         });
503     if (It != DieArray.end() && It->getOffset() == Offset)
504       return DWARFDie(this, &*It);
505     return DWARFDie();
506   }
507 
508   uint32_t getLineTableOffset() const {
509     if (auto IndexEntry = Header.getIndexEntry())
510       if (const auto *Contrib = IndexEntry->getOffset(DW_SECT_LINE))
511         return Contrib->Offset;
512     return 0;
513   }
514 
515   die_iterator_range dies() {
516     extractDIEsIfNeeded(false);
517     return die_iterator_range(DieArray.begin(), DieArray.end());
518   }
519 
520   virtual void dump(raw_ostream &OS, DIDumpOptions DumpOpts) = 0;
521 
522   Error tryExtractDIEsIfNeeded(bool CUDieOnly);
523 
524 private:
525   /// Size in bytes of the .debug_info data associated with this compile unit.
526   size_t getDebugInfoSize() const {
527     return Header.getLength() + Header.getUnitLengthFieldByteSize() -
528            getHeaderSize();
529   }
530 
531   /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it
532   /// hasn't already been done
533   void extractDIEsIfNeeded(bool CUDieOnly);
534 
535   /// extractDIEsToVector - Appends all parsed DIEs to a vector.
536   void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs,
537                            std::vector<DWARFDebugInfoEntry> &DIEs) const;
538 
539   /// clearDIEs - Clear parsed DIEs to keep memory usage low.
540   void clearDIEs(bool KeepCUDie);
541 
542   /// parseDWO - Parses .dwo file for current compile unit. Returns true if
543   /// it was actually constructed.
544   bool parseDWO();
545 };
546 
547 } // end namespace llvm
548 
549 #endif // LLVM_DEBUGINFO_DWARF_DWARFUNIT_H
550