1 //===- DwarfTransformer.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <thread>
10 #include <unordered_set>
11 
12 #include "llvm/DebugInfo/DIContext.h"
13 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
14 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
15 #include "llvm/Support/Error.h"
16 #include "llvm/Support/ThreadPool.h"
17 #include "llvm/Support/raw_ostream.h"
18 
19 #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
20 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
21 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
22 #include "llvm/DebugInfo/GSYM/GsymReader.h"
23 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
24 #include <optional>
25 
26 using namespace llvm;
27 using namespace gsym;
28 
29 struct llvm::gsym::CUInfo {
30   const DWARFDebugLine::LineTable *LineTable;
31   const char *CompDir;
32   std::vector<uint32_t> FileCache;
33   uint64_t Language = 0;
34   uint8_t AddrSize = 0;
35 
CUInfollvm::gsym::CUInfo36   CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) {
37     LineTable = DICtx.getLineTableForUnit(CU);
38     CompDir = CU->getCompilationDir();
39     FileCache.clear();
40     if (LineTable)
41       FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX);
42     DWARFDie Die = CU->getUnitDIE();
43     Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0);
44     AddrSize = CU->getAddressByteSize();
45   }
46 
47   /// Return true if Addr is the highest address for a given compile unit. The
48   /// highest address is encoded as -1, of all ones in the address. These high
49   /// addresses are used by some linkers to indicate that a function has been
50   /// dead stripped or didn't end up in the linked executable.
isHighestAddressllvm::gsym::CUInfo51   bool isHighestAddress(uint64_t Addr) const {
52     if (AddrSize == 4)
53       return Addr == UINT32_MAX;
54     else if (AddrSize == 8)
55       return Addr == UINT64_MAX;
56     return false;
57   }
58 
59   /// Convert a DWARF compile unit file index into a GSYM global file index.
60   ///
61   /// Each compile unit in DWARF has its own file table in the line table
62   /// prologue. GSYM has a single large file table that applies to all files
63   /// from all of the info in a GSYM file. This function converts between the
64   /// two and caches and DWARF CU file index that has already been converted so
65   /// the first client that asks for a compile unit file index will end up
66   /// doing the conversion, and subsequent clients will get the cached GSYM
67   /// index.
DWARFToGSYMFileIndexllvm::gsym::CUInfo68   std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym,
69                                                uint32_t DwarfFileIdx) {
70     if (!LineTable || DwarfFileIdx >= FileCache.size())
71       return std::nullopt;
72     uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
73     if (GsymFileIdx != UINT32_MAX)
74       return GsymFileIdx;
75     std::string File;
76     if (LineTable->getFileNameByIndex(
77             DwarfFileIdx, CompDir,
78             DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File))
79       GsymFileIdx = Gsym.insertFile(File);
80     else
81       GsymFileIdx = 0;
82     return GsymFileIdx;
83   }
84 };
85 
86 
GetParentDeclContextDIE(DWARFDie & Die)87 static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) {
88   if (DWARFDie SpecDie =
89           Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) {
90     if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie))
91       return SpecParent;
92   }
93   if (DWARFDie AbstDie =
94           Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) {
95     if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie))
96       return AbstParent;
97   }
98 
99   // We never want to follow parent for inlined subroutine - that would
100   // give us information about where the function is inlined, not what
101   // function is inlined
102   if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
103     return DWARFDie();
104 
105   DWARFDie ParentDie = Die.getParent();
106   if (!ParentDie)
107     return DWARFDie();
108 
109   switch (ParentDie.getTag()) {
110   case dwarf::DW_TAG_namespace:
111   case dwarf::DW_TAG_structure_type:
112   case dwarf::DW_TAG_union_type:
113   case dwarf::DW_TAG_class_type:
114   case dwarf::DW_TAG_subprogram:
115     return ParentDie; // Found parent decl context DIE
116   case dwarf::DW_TAG_lexical_block:
117     return GetParentDeclContextDIE(ParentDie);
118   default:
119     break;
120   }
121 
122   return DWARFDie();
123 }
124 
125 /// Get the GsymCreator string table offset for the qualified name for the
126 /// DIE passed in. This function will avoid making copies of any strings in
127 /// the GsymCreator when possible. We don't need to copy a string when the
128 /// string comes from our .debug_str section or is an inlined string in the
129 /// .debug_info. If we create a qualified name string in this function by
130 /// combining multiple strings in the DWARF string table or info, we will make
131 /// a copy of the string when we add it to the string table.
132 static std::optional<uint32_t>
getQualifiedNameIndex(DWARFDie & Die,uint64_t Language,GsymCreator & Gsym)133 getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) {
134   // If the dwarf has mangled name, use mangled name
135   if (auto LinkageName = Die.getLinkageName()) {
136     // We have seen cases were linkage name is actually empty.
137     if (strlen(LinkageName) > 0)
138       return Gsym.insertString(LinkageName, /* Copy */ false);
139   }
140 
141   StringRef ShortName(Die.getName(DINameKind::ShortName));
142   if (ShortName.empty())
143     return std::nullopt;
144 
145   // For C++ and ObjC, prepend names of all parent declaration contexts
146   if (!(Language == dwarf::DW_LANG_C_plus_plus ||
147         Language == dwarf::DW_LANG_C_plus_plus_03 ||
148         Language == dwarf::DW_LANG_C_plus_plus_11 ||
149         Language == dwarf::DW_LANG_C_plus_plus_14 ||
150         Language == dwarf::DW_LANG_ObjC_plus_plus ||
151         // This should not be needed for C, but we see C++ code marked as C
152         // in some binaries. This should hurt, so let's do it for C as well
153         Language == dwarf::DW_LANG_C))
154     return Gsym.insertString(ShortName, /* Copy */ false);
155 
156   // Some GCC optimizations create functions with names ending with .isra.<num>
157   // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name
158   // If it looks like it could be the case, don't add any prefix
159   if (ShortName.starts_with("_Z") &&
160       (ShortName.contains(".isra.") || ShortName.contains(".part.")))
161     return Gsym.insertString(ShortName, /* Copy */ false);
162 
163   DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die);
164   if (ParentDeclCtxDie) {
165     std::string Name = ShortName.str();
166     while (ParentDeclCtxDie) {
167       StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName));
168       if (!ParentName.empty()) {
169         // "lambda" names are wrapped in < >. Replace with { }
170         // to be consistent with demangled names and not to confuse with
171         // templates
172         if (ParentName.front() == '<' && ParentName.back() == '>')
173           Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" +
174                 "::" + Name;
175         else
176           Name = ParentName.str() + "::" + Name;
177       }
178       ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie);
179     }
180     // Copy the name since we created a new name in a std::string.
181     return Gsym.insertString(Name, /* Copy */ true);
182   }
183   // Don't copy the name since it exists in the DWARF object file.
184   return Gsym.insertString(ShortName, /* Copy */ false);
185 }
186 
hasInlineInfo(DWARFDie Die,uint32_t Depth)187 static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) {
188   bool CheckChildren = true;
189   switch (Die.getTag()) {
190   case dwarf::DW_TAG_subprogram:
191     // Don't look into functions within functions.
192     CheckChildren = Depth == 0;
193     break;
194   case dwarf::DW_TAG_inlined_subroutine:
195     return true;
196   default:
197     break;
198   }
199   if (!CheckChildren)
200     return false;
201   for (DWARFDie ChildDie : Die.children()) {
202     if (hasInlineInfo(ChildDie, Depth + 1))
203       return true;
204   }
205   return false;
206 }
207 
208 static AddressRanges
ConvertDWARFRanges(const DWARFAddressRangesVector & DwarfRanges)209 ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges) {
210   AddressRanges Ranges;
211   for (const DWARFAddressRange &DwarfRange : DwarfRanges) {
212     if (DwarfRange.LowPC < DwarfRange.HighPC)
213       Ranges.insert({DwarfRange.LowPC, DwarfRange.HighPC});
214   }
215   return Ranges;
216 }
217 
parseInlineInfo(GsymCreator & Gsym,raw_ostream * Log,CUInfo & CUI,DWARFDie Die,uint32_t Depth,FunctionInfo & FI,InlineInfo & Parent,const AddressRanges & AllParentRanges,bool & WarnIfEmpty)218 static void parseInlineInfo(GsymCreator &Gsym, raw_ostream *Log, CUInfo &CUI,
219                             DWARFDie Die, uint32_t Depth, FunctionInfo &FI,
220                             InlineInfo &Parent,
221                             const AddressRanges &AllParentRanges,
222                             bool &WarnIfEmpty) {
223   if (!hasInlineInfo(Die, Depth))
224     return;
225 
226   dwarf::Tag Tag = Die.getTag();
227   if (Tag == dwarf::DW_TAG_inlined_subroutine) {
228     // create new InlineInfo and append to parent.children
229     InlineInfo II;
230     AddressRanges AllInlineRanges;
231     Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
232     if (RangesOrError) {
233       AllInlineRanges = ConvertDWARFRanges(RangesOrError.get());
234       uint32_t EmptyCount = 0;
235       for (const AddressRange &InlineRange : AllInlineRanges) {
236         // Check for empty inline range in case inline function was outlined
237         // or has not code
238         if (InlineRange.empty()) {
239           ++EmptyCount;
240         } else {
241           if (Parent.Ranges.contains(InlineRange)) {
242             II.Ranges.insert(InlineRange);
243           } else {
244             // Only warn if the current inline range is not within any of all
245             // of the parent ranges. If we have a DW_TAG_subpgram with multiple
246             // ranges we will emit a FunctionInfo for each range of that
247             // function that only emits information within the current range,
248             // so we only want to emit an error if the DWARF has issues, not
249             // when a range currently just isn't in the range we are currently
250             // parsing for.
251             if (AllParentRanges.contains(InlineRange)) {
252               WarnIfEmpty = false;
253             } else if (Log) {
254               *Log << "error: inlined function DIE at "
255                    << HEX32(Die.getOffset()) << " has a range ["
256                    << HEX64(InlineRange.start()) << " - "
257                    << HEX64(InlineRange.end()) << ") that isn't contained in "
258                    << "any parent address ranges, this inline range will be "
259                       "removed.\n";
260             }
261           }
262         }
263       }
264       // If we have all empty ranges for the inlines, then don't warn if we
265       // have an empty InlineInfo at the top level as all inline functions
266       // were elided.
267       if (EmptyCount == AllInlineRanges.size())
268         WarnIfEmpty = false;
269     }
270     if (II.Ranges.empty())
271       return;
272 
273     if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym))
274       II.Name = *NameIndex;
275     const uint64_t DwarfFileIdx = dwarf::toUnsigned(
276         Die.findRecursively(dwarf::DW_AT_call_file), UINT32_MAX);
277     std::optional<uint32_t> OptGSymFileIdx =
278         CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx);
279     if (OptGSymFileIdx) {
280       II.CallFile = OptGSymFileIdx.value();
281       II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
282       // parse all children and append to parent
283       for (DWARFDie ChildDie : Die.children())
284         parseInlineInfo(Gsym, Log, CUI, ChildDie, Depth + 1, FI, II,
285                         AllInlineRanges, WarnIfEmpty);
286       Parent.Children.emplace_back(std::move(II));
287     } else if (Log) {
288       *Log << "error: inlined function DIE at " << HEX32(Die.getOffset())
289            << " has an invalid file index " << DwarfFileIdx
290            << " in its DW_AT_call_file attribute, this inline entry and all "
291            << "children will be removed.\n";
292     }
293     return;
294   }
295   if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
296     // skip this Die and just recurse down
297     for (DWARFDie ChildDie : Die.children())
298       parseInlineInfo(Gsym, Log, CUI, ChildDie, Depth + 1, FI, Parent,
299                       AllParentRanges, WarnIfEmpty);
300   }
301 }
302 
convertFunctionLineTable(raw_ostream * Log,CUInfo & CUI,DWARFDie Die,GsymCreator & Gsym,FunctionInfo & FI)303 static void convertFunctionLineTable(raw_ostream *Log, CUInfo &CUI,
304                                      DWARFDie Die, GsymCreator &Gsym,
305                                      FunctionInfo &FI) {
306   std::vector<uint32_t> RowVector;
307   const uint64_t StartAddress = FI.startAddress();
308   const uint64_t EndAddress = FI.endAddress();
309   const uint64_t RangeSize = EndAddress - StartAddress;
310   const object::SectionedAddress SecAddress{
311       StartAddress, object::SectionedAddress::UndefSection};
312 
313 
314   if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) {
315     // If we have a DW_TAG_subprogram but no line entries, fall back to using
316     // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
317     std::string FilePath = Die.getDeclFile(
318         DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
319     if (FilePath.empty()) {
320       // If we had a DW_AT_decl_file, but got no file then we need to emit a
321       // warning.
322       if (Log) {
323         const uint64_t DwarfFileIdx = dwarf::toUnsigned(
324             Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
325         *Log << "error: function DIE at " << HEX32(Die.getOffset())
326              << " has an invalid file index " << DwarfFileIdx
327              << " in its DW_AT_decl_file attribute, unable to create a single "
328              << "line entry from the DW_AT_decl_file/DW_AT_decl_line "
329              << "attributes.\n";
330       }
331       return;
332     }
333     if (auto Line =
334             dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
335       LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line);
336       FI.OptLineTable = LineTable();
337       FI.OptLineTable->push(LE);
338     }
339     return;
340   }
341 
342   FI.OptLineTable = LineTable();
343   DWARFDebugLine::Row PrevRow;
344   for (uint32_t RowIndex : RowVector) {
345     // Take file number and line/column from the row.
346     const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
347     std::optional<uint32_t> OptFileIdx =
348         CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
349     if (!OptFileIdx) {
350       if (Log) {
351         *Log << "error: function DIE at " << HEX32(Die.getOffset()) << " has "
352              << "a line entry with invalid DWARF file index, this entry will "
353              << "be removed:\n";
354         Row.dumpTableHeader(*Log, /*Indent=*/0);
355         Row.dump(*Log);
356         *Log << "\n";
357       }
358       continue;
359     }
360     const uint32_t FileIdx = OptFileIdx.value();
361     uint64_t RowAddress = Row.Address.Address;
362     // Watch out for a RowAddress that is in the middle of a line table entry
363     // in the DWARF. If we pass an address in between two line table entries
364     // we will get a RowIndex for the previous valid line table row which won't
365     // be contained in our function. This is usually a bug in the DWARF due to
366     // linker problems or LTO or other DWARF re-linking so it is worth emitting
367     // an error, but not worth stopping the creation of the GSYM.
368     if (!FI.Range.contains(RowAddress)) {
369       if (RowAddress < FI.Range.start()) {
370         if (Log) {
371           *Log << "error: DIE has a start address whose LowPC is between the "
372                   "line table Row[" << RowIndex << "] with address "
373                << HEX64(RowAddress) << " and the next one.\n";
374           Die.dump(*Log, 0, DIDumpOptions::getForSingleDIE());
375         }
376         RowAddress = FI.Range.start();
377       } else {
378         continue;
379       }
380     }
381 
382     LineEntry LE(RowAddress, FileIdx, Row.Line);
383     if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) {
384       // We have seen full duplicate line tables for functions in some
385       // DWARF files. Watch for those here by checking the last
386       // row was the function's end address (HighPC) and that the
387       // current line table entry's address is the same as the first
388       // line entry we already have in our "function_info.Lines". If
389       // so break out after printing a warning.
390       auto FirstLE = FI.OptLineTable->first();
391       if (FirstLE && *FirstLE == LE) {
392         if (Log && !Gsym.isQuiet()) {
393           *Log << "warning: duplicate line table detected for DIE:\n";
394           Die.dump(*Log, 0, DIDumpOptions::getForSingleDIE());
395         }
396       } else {
397         if (Log) {
398           *Log << "error: line table has addresses that do not "
399                << "monotonically increase:\n";
400           for (uint32_t RowIndex2 : RowVector)
401             CUI.LineTable->Rows[RowIndex2].dump(*Log);
402           Die.dump(*Log, 0, DIDumpOptions::getForSingleDIE());
403         }
404       }
405       break;
406     }
407 
408     // Skip multiple line entries for the same file and line.
409     auto LastLE = FI.OptLineTable->last();
410     if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line)
411         continue;
412     // Only push a row if it isn't an end sequence. End sequence markers are
413     // included for the last address in a function or the last contiguous
414     // address in a sequence.
415     if (Row.EndSequence) {
416       // End sequence means that the next line entry could have a lower address
417       // that the previous entries. So we clear the previous row so we don't
418       // trigger the line table error about address that do not monotonically
419       // increase.
420       PrevRow = DWARFDebugLine::Row();
421     } else {
422       FI.OptLineTable->push(LE);
423       PrevRow = Row;
424     }
425   }
426   // If not line table rows were added, clear the line table so we don't encode
427   // on in the GSYM file.
428   if (FI.OptLineTable->empty())
429     FI.OptLineTable = std::nullopt;
430 }
431 
handleDie(raw_ostream * OS,CUInfo & CUI,DWARFDie Die)432 void DwarfTransformer::handleDie(raw_ostream *OS, CUInfo &CUI, DWARFDie Die) {
433   switch (Die.getTag()) {
434   case dwarf::DW_TAG_subprogram: {
435     Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
436     if (!RangesOrError) {
437       consumeError(RangesOrError.takeError());
438       break;
439     }
440     const DWARFAddressRangesVector &Ranges = RangesOrError.get();
441     if (Ranges.empty())
442       break;
443     auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym);
444     if (!NameIndex) {
445       if (OS) {
446         *OS << "error: function at " << HEX64(Die.getOffset())
447             << " has no name\n ";
448         Die.dump(*OS, 0, DIDumpOptions::getForSingleDIE());
449       }
450       break;
451     }
452     // All ranges for the subprogram DIE in case it has multiple. We need to
453     // pass this down into parseInlineInfo so we don't warn about inline
454     // ranges that are not in the current subrange of a function when they
455     // actually are in another subgrange. We do this because when a function
456     // has discontiguos ranges, we create multiple function entries with only
457     // the info for that range contained inside of it.
458     AddressRanges AllSubprogramRanges = ConvertDWARFRanges(Ranges);
459 
460     // Create a function_info for each range
461     for (const DWARFAddressRange &Range : Ranges) {
462       // The low PC must be less than the high PC. Many linkers don't remove
463       // DWARF for functions that don't get linked into the final executable.
464       // If both the high and low pc have relocations, linkers will often set
465       // the address values for both to the same value to indicate the function
466       // has been remove. Other linkers have been known to set the one or both
467       // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8
468       // byte addresses to indicate the function isn't valid. The check below
469       // tries to watch for these cases and abort if it runs into them.
470       if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC))
471         break;
472 
473       // Many linkers can't remove DWARF and might set the LowPC to zero. Since
474       // high PC can be an offset from the low PC in more recent DWARF versions
475       // we need to watch for a zero'ed low pc which we do using
476       // ValidTextRanges below.
477       if (!Gsym.IsValidTextAddress(Range.LowPC)) {
478         // We expect zero and -1 to be invalid addresses in DWARF depending
479         // on the linker of the DWARF. This indicates a function was stripped
480         // and the debug info wasn't able to be stripped from the DWARF. If
481         // the LowPC isn't zero or -1, then we should emit an error.
482         if (Range.LowPC != 0) {
483           if (!Gsym.isQuiet()) {
484             // Unexpected invalid address, emit a warning
485             if (OS) {
486               *OS << "warning: DIE has an address range whose start address "
487                      "is not in any executable sections ("
488                   << *Gsym.GetValidTextRanges()
489                   << ") and will not be processed:\n";
490               Die.dump(*OS, 0, DIDumpOptions::getForSingleDIE());
491             }
492           }
493         }
494         break;
495       }
496 
497       FunctionInfo FI;
498       FI.Range = {Range.LowPC, Range.HighPC};
499       FI.Name = *NameIndex;
500       if (CUI.LineTable)
501         convertFunctionLineTable(OS, CUI, Die, Gsym, FI);
502 
503       if (hasInlineInfo(Die, 0)) {
504         FI.Inline = InlineInfo();
505         FI.Inline->Name = *NameIndex;
506         FI.Inline->Ranges.insert(FI.Range);
507         bool WarnIfEmpty = true;
508         parseInlineInfo(Gsym, OS, CUI, Die, 0, FI, *FI.Inline,
509                         AllSubprogramRanges, WarnIfEmpty);
510         // Make sure we at least got some valid inline info other than just
511         // the top level function. If we didn't then remove the inline info
512         // from the function info. We have seen cases where LTO tries to modify
513         // the DWARF for functions and it messes up the address ranges for
514         // the inline functions so it is no longer valid.
515         //
516         // By checking if there are any valid children on the top level inline
517         // information object, we will know if we got anything valid from the
518         // debug info.
519         if (FI.Inline->Children.empty()) {
520           if (WarnIfEmpty && OS && !Gsym.isQuiet()) {
521             *OS << "warning: DIE contains inline function information that has "
522                   "no valid ranges, removing inline information:\n";
523             Die.dump(*OS, 0, DIDumpOptions::getForSingleDIE());
524           }
525           FI.Inline = std::nullopt;
526         }
527       }
528       Gsym.addFunctionInfo(std::move(FI));
529     }
530   } break;
531   default:
532     break;
533   }
534   for (DWARFDie ChildDie : Die.children())
535     handleDie(OS, CUI, ChildDie);
536 }
537 
convert(uint32_t NumThreads,raw_ostream * OS)538 Error DwarfTransformer::convert(uint32_t NumThreads, raw_ostream *OS) {
539   size_t NumBefore = Gsym.getNumFunctionInfos();
540   auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
541     DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false);
542     if (DwarfUnit.getDWOId()) {
543       DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit();
544       if (OS && !DWOCU->isDWOUnit()) {
545         std::string DWOName = dwarf::toString(
546             DwarfUnit.getUnitDIE().find(
547                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
548             "");
549         *OS << "warning: Unable to retrieve DWO .debug_info section for "
550             << DWOName << "\n";
551       } else {
552         ReturnDie = DWOCU->getUnitDIE(false);
553       }
554     }
555     return ReturnDie;
556   };
557   if (NumThreads == 1) {
558     // Parse all DWARF data from this thread, use the same string/file table
559     // for everything
560     for (const auto &CU : DICtx.compile_units()) {
561       DWARFDie Die = getDie(*CU);
562       CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
563       handleDie(OS, CUI, Die);
564     }
565   } else {
566     // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
567     // front before we start accessing any DIEs since there might be
568     // cross compile unit references in the DWARF. If we don't do this we can
569     // end up crashing.
570 
571     // We need to call getAbbreviations sequentially first so that getUnitDIE()
572     // only works with its local data.
573     for (const auto &CU : DICtx.compile_units())
574       CU->getAbbreviations();
575 
576     // Now parse all DIEs in case we have cross compile unit references in a
577     // thread pool.
578     ThreadPool pool(hardware_concurrency(NumThreads));
579     for (const auto &CU : DICtx.compile_units())
580       pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); });
581     pool.wait();
582 
583     // Now convert all DWARF to GSYM in a thread pool.
584     std::mutex LogMutex;
585     for (const auto &CU : DICtx.compile_units()) {
586       DWARFDie Die = getDie(*CU);
587       if (Die) {
588         CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
589         pool.async([this, CUI, &LogMutex, OS, Die]() mutable {
590           std::string ThreadLogStorage;
591           raw_string_ostream ThreadOS(ThreadLogStorage);
592           handleDie(OS ? &ThreadOS: nullptr, CUI, Die);
593           ThreadOS.flush();
594           if (OS && !ThreadLogStorage.empty()) {
595             // Print ThreadLogStorage lines into an actual stream under a lock
596             std::lock_guard<std::mutex> guard(LogMutex);
597             *OS << ThreadLogStorage;
598           }
599         });
600       }
601     }
602     pool.wait();
603   }
604   size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
605   if (OS)
606     *OS << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
607   return Error::success();
608 }
609 
verify(StringRef GsymPath,raw_ostream & Log)610 llvm::Error DwarfTransformer::verify(StringRef GsymPath, raw_ostream &Log) {
611   Log << "Verifying GSYM file \"" << GsymPath << "\":\n";
612 
613   auto Gsym = GsymReader::openFile(GsymPath);
614   if (!Gsym)
615     return Gsym.takeError();
616 
617   auto NumAddrs = Gsym->getNumAddresses();
618   DILineInfoSpecifier DLIS(
619       DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
620       DILineInfoSpecifier::FunctionNameKind::LinkageName);
621   std::string gsymFilename;
622   for (uint32_t I = 0; I < NumAddrs; ++I) {
623     auto FuncAddr = Gsym->getAddress(I);
624     if (!FuncAddr)
625         return createStringError(std::errc::invalid_argument,
626                                   "failed to extract address[%i]", I);
627 
628     auto FI = Gsym->getFunctionInfo(*FuncAddr);
629     if (!FI)
630       return createStringError(std::errc::invalid_argument,
631                             "failed to extract function info for address 0x%"
632                             PRIu64, *FuncAddr);
633 
634     for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) {
635       const object::SectionedAddress SectAddr{
636           Addr, object::SectionedAddress::UndefSection};
637       auto LR = Gsym->lookup(Addr);
638       if (!LR)
639         return LR.takeError();
640 
641       auto DwarfInlineInfos =
642           DICtx.getInliningInfoForAddress(SectAddr, DLIS);
643       uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames();
644       if (NumDwarfInlineInfos == 0) {
645         DwarfInlineInfos.addFrame(
646             DICtx.getLineInfoForAddress(SectAddr, DLIS));
647       }
648 
649       // Check for 1 entry that has no file and line info
650       if (NumDwarfInlineInfos == 1 &&
651           DwarfInlineInfos.getFrame(0).FileName == "<invalid>") {
652         DwarfInlineInfos = DIInliningInfo();
653         NumDwarfInlineInfos = 0;
654       }
655       if (NumDwarfInlineInfos > 0 &&
656           NumDwarfInlineInfos != LR->Locations.size()) {
657         Log << "error: address " << HEX64(Addr) << " has "
658             << NumDwarfInlineInfos << " DWARF inline frames and GSYM has "
659             << LR->Locations.size() << "\n";
660         Log << "    " << NumDwarfInlineInfos << " DWARF frames:\n";
661         for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) {
662           const auto &dii = DwarfInlineInfos.getFrame(Idx);
663           Log << "    [" << Idx << "]: " << dii.FunctionName << " @ "
664               << dii.FileName << ':' << dii.Line << '\n';
665         }
666         Log << "    " << LR->Locations.size() << " GSYM frames:\n";
667         for (size_t Idx = 0, count = LR->Locations.size();
668               Idx < count; ++Idx) {
669           const auto &gii = LR->Locations[Idx];
670           Log << "    [" << Idx << "]: " << gii.Name << " @ " << gii.Dir
671               << '/' << gii.Base << ':' << gii.Line << '\n';
672         }
673         DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS);
674         Gsym->dump(Log, *FI);
675         continue;
676       }
677 
678       for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
679             ++Idx) {
680         const auto &gii = LR->Locations[Idx];
681         if (Idx < NumDwarfInlineInfos) {
682           const auto &dii = DwarfInlineInfos.getFrame(Idx);
683           gsymFilename = LR->getSourceFile(Idx);
684           // Verify function name
685           if (dii.FunctionName.find(gii.Name.str()) != 0)
686             Log << "error: address " << HEX64(Addr) << " DWARF function \""
687                 << dii.FunctionName.c_str()
688                 << "\" doesn't match GSYM function \"" << gii.Name << "\"\n";
689           // Verify source file path
690           if (dii.FileName != gsymFilename)
691             Log << "error: address " << HEX64(Addr) << " DWARF path \""
692                 << dii.FileName.c_str() << "\" doesn't match GSYM path \""
693                 << gsymFilename.c_str() << "\"\n";
694           // Verify source file line
695           if (dii.Line != gii.Line)
696             Log << "error: address " << HEX64(Addr) << " DWARF line "
697                 << dii.Line << " != GSYM line " << gii.Line << "\n";
698         }
699       }
700     }
701   }
702   return Error::success();
703 }
704