1 #include "llvm/ADT/DenseMap.h"
2 #include "llvm/ADT/StringExtras.h"
3 #include "llvm/ADT/StringSet.h"
4 #include "llvm/DebugInfo/DIContext.h"
5 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
6 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
7 #include "llvm/Object/ObjectFile.h"
8 
9 #define DEBUG_TYPE "dwarfdump"
10 using namespace llvm;
11 using namespace object;
12 
13 /// Holds statistics for one function (or other entity that has a PC range and
14 /// contains variables, such as a compile unit).
15 struct PerFunctionStats {
16   /// Number of inlined instances of this function.
17   unsigned NumFnInlined = 0;
18   /// Number of variables with location across all inlined instances.
19   unsigned TotalVarWithLoc = 0;
20   /// Number of constants with location across all inlined instances.
21   unsigned ConstantMembers = 0;
22   /// List of all Variables in this function.
23   StringSet<> VarsInFunction;
24   /// Compile units also cover a PC range, but have this flag set to false.
25   bool IsFunction = false;
26 };
27 
28 /// Holds accumulated global statistics about DIEs.
29 struct GlobalStats {
30   /// Total number of PC range bytes covered by DW_AT_locations.
31   unsigned ScopeBytesCovered = 0;
32   /// Total number of PC range bytes in each variable's enclosing scope,
33   /// starting from the first definition of the variable.
34   unsigned ScopeBytesFromFirstDefinition = 0;
35   /// Total number of call site entries (DW_TAG_call_site).
36   unsigned CallSiteEntries = 0;
37   /// Total byte size of concrete functions. This byte size includes
38   /// inline functions contained in the concrete functions.
39   uint64_t FunctionSize = 0;
40   /// Total byte size of inlined functions. This is the total number of bytes
41   /// for the top inline functions within concrete functions. This can help
42   /// tune the inline settings when compiling to match user expectations.
43   uint64_t InlineFunctionSize = 0;
44 };
45 
46 /// Extract the low pc from a Die.
getLowPC(DWARFDie Die)47 static uint64_t getLowPC(DWARFDie Die) {
48   auto RangesOrError = Die.getAddressRanges();
49   DWARFAddressRangesVector Ranges;
50   if (RangesOrError)
51     Ranges = RangesOrError.get();
52   else
53     llvm::consumeError(RangesOrError.takeError());
54   if (Ranges.size())
55     return Ranges[0].LowPC;
56   return dwarf::toAddress(Die.find(dwarf::DW_AT_low_pc), 0);
57 }
58 
59 /// Collect debug info quality metrics for one DIE.
collectStatsForDie(DWARFDie Die,std::string FnPrefix,std::string VarPrefix,uint64_t ScopeLowPC,uint64_t BytesInScope,uint32_t InlineDepth,StringMap<PerFunctionStats> & FnStatMap,GlobalStats & GlobalStats)60 static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
61                                std::string VarPrefix, uint64_t ScopeLowPC,
62                                uint64_t BytesInScope,
63                                uint32_t InlineDepth,
64                                StringMap<PerFunctionStats> &FnStatMap,
65                                GlobalStats &GlobalStats) {
66   bool HasLoc = false;
67   uint64_t BytesCovered = 0;
68   uint64_t OffsetToFirstDefinition = 0;
69 
70   if (Die.getTag() == dwarf::DW_TAG_call_site) {
71     GlobalStats.CallSiteEntries++;
72     return;
73   }
74 
75   if (Die.getTag() != dwarf::DW_TAG_formal_parameter &&
76       Die.getTag() != dwarf::DW_TAG_variable &&
77       Die.getTag() != dwarf::DW_TAG_member) {
78     // Not a variable or constant member.
79     return;
80   }
81 
82   if (Die.find(dwarf::DW_AT_const_value)) {
83     // This catches constant members *and* variables.
84     HasLoc = true;
85     BytesCovered = BytesInScope;
86   } else {
87     if (Die.getTag() == dwarf::DW_TAG_member) {
88       // Non-const member.
89       return;
90     }
91     // Handle variables and function arguments.
92     auto FormValue = Die.find(dwarf::DW_AT_location);
93     HasLoc = FormValue.hasValue();
94     if (HasLoc) {
95       // Get PC coverage.
96       if (auto DebugLocOffset = FormValue->getAsSectionOffset()) {
97         auto *DebugLoc = Die.getDwarfUnit()->getContext().getDebugLoc();
98         if (auto List = DebugLoc->getLocationListAtOffset(*DebugLocOffset)) {
99           for (auto Entry : List->Entries)
100             BytesCovered += Entry.End - Entry.Begin;
101           if (List->Entries.size()) {
102             uint64_t FirstDef = List->Entries[0].Begin;
103             uint64_t UnitOfs = getLowPC(Die.getDwarfUnit()->getUnitDIE());
104             // Ranges sometimes start before the lexical scope.
105             if (UnitOfs + FirstDef >= ScopeLowPC)
106               OffsetToFirstDefinition = UnitOfs + FirstDef - ScopeLowPC;
107             // Or even after it. Count that as a failure.
108             if (OffsetToFirstDefinition > BytesInScope)
109               OffsetToFirstDefinition = 0;
110           }
111         }
112         assert(BytesInScope);
113       } else {
114         // Assume the entire range is covered by a single location.
115         BytesCovered = BytesInScope;
116       }
117     }
118   }
119 
120   // Collect PC range coverage data.
121   auto &FnStats = FnStatMap[FnPrefix];
122   if (DWARFDie D =
123           Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin))
124     Die = D;
125   // By using the variable name + the path through the lexical block tree, the
126   // keys are consistent across duplicate abstract origins in different CUs.
127   std::string VarName = StringRef(Die.getName(DINameKind::ShortName));
128   FnStats.VarsInFunction.insert(VarPrefix+VarName);
129   if (BytesInScope) {
130     FnStats.TotalVarWithLoc += (unsigned)HasLoc;
131     // Adjust for the fact the variables often start their lifetime in the
132     // middle of the scope.
133     BytesInScope -= OffsetToFirstDefinition;
134     // Turns out we have a lot of ranges that extend past the lexical scope.
135     GlobalStats.ScopeBytesCovered += std::min(BytesInScope, BytesCovered);
136     GlobalStats.ScopeBytesFromFirstDefinition += BytesInScope;
137     assert(GlobalStats.ScopeBytesCovered <=
138            GlobalStats.ScopeBytesFromFirstDefinition);
139   } else {
140     FnStats.ConstantMembers++;
141   }
142 }
143 
144 /// Recursively collect debug info quality metrics.
collectStatsRecursive(DWARFDie Die,std::string FnPrefix,std::string VarPrefix,uint64_t ScopeLowPC,uint64_t BytesInScope,uint32_t InlineDepth,StringMap<PerFunctionStats> & FnStatMap,GlobalStats & GlobalStats)145 static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
146                                   std::string VarPrefix, uint64_t ScopeLowPC,
147                                   uint64_t BytesInScope,
148                                   uint32_t InlineDepth,
149                                   StringMap<PerFunctionStats> &FnStatMap,
150                                   GlobalStats &GlobalStats) {
151   // Handle any kind of lexical scope.
152   const dwarf::Tag Tag = Die.getTag();
153   const bool IsFunction = Tag == dwarf::DW_TAG_subprogram;
154   const bool IsBlock = Tag == dwarf::DW_TAG_lexical_block;
155   const bool IsInlinedFunction = Tag == dwarf::DW_TAG_inlined_subroutine;
156   if (IsFunction || IsInlinedFunction || IsBlock) {
157 
158     // Reset VarPrefix when entering a new function.
159     if (Die.getTag() == dwarf::DW_TAG_subprogram ||
160         Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
161       VarPrefix = "v";
162 
163     // Ignore forward declarations.
164     if (Die.find(dwarf::DW_AT_declaration))
165       return;
166 
167     // Count the function.
168     if (!IsBlock) {
169       StringRef Name = Die.getName(DINameKind::LinkageName);
170       if (Name.empty())
171         Name = Die.getName(DINameKind::ShortName);
172       FnPrefix = Name;
173       // Skip over abstract origins.
174       if (Die.find(dwarf::DW_AT_inline))
175         return;
176       // We've seen an (inlined) instance of this function.
177       auto &FnStats = FnStatMap[Name];
178       FnStats.NumFnInlined++;
179       FnStats.IsFunction = true;
180     }
181 
182     // PC Ranges.
183     auto RangesOrError = Die.getAddressRanges();
184     if (!RangesOrError) {
185       llvm::consumeError(RangesOrError.takeError());
186       return;
187     }
188 
189     auto Ranges = RangesOrError.get();
190     uint64_t BytesInThisScope = 0;
191     for (auto Range : Ranges)
192       BytesInThisScope += Range.HighPC - Range.LowPC;
193     ScopeLowPC = getLowPC(Die);
194 
195     if (BytesInThisScope) {
196       BytesInScope = BytesInThisScope;
197       if (IsFunction)
198         GlobalStats.FunctionSize += BytesInThisScope;
199       else if (IsInlinedFunction && InlineDepth == 0)
200         GlobalStats.InlineFunctionSize += BytesInThisScope;
201     }
202   } else {
203     // Not a scope, visit the Die itself. It could be a variable.
204     collectStatsForDie(Die, FnPrefix, VarPrefix, ScopeLowPC, BytesInScope,
205                        InlineDepth, FnStatMap, GlobalStats);
206   }
207 
208   // Set InlineDepth correctly for child recursion
209   if (IsFunction)
210     InlineDepth = 0;
211   else if (IsInlinedFunction)
212     ++InlineDepth;
213 
214   // Traverse children.
215   unsigned LexicalBlockIndex = 0;
216   DWARFDie Child = Die.getFirstChild();
217   while (Child) {
218     std::string ChildVarPrefix = VarPrefix;
219     if (Child.getTag() == dwarf::DW_TAG_lexical_block)
220       ChildVarPrefix += toHex(LexicalBlockIndex++) + '.';
221 
222     collectStatsRecursive(Child, FnPrefix, ChildVarPrefix, ScopeLowPC,
223                           BytesInScope, InlineDepth, FnStatMap, GlobalStats);
224     Child = Child.getSibling();
225   }
226 }
227 
228 /// Print machine-readable output.
229 /// The machine-readable format is single-line JSON output.
230 /// \{
printDatum(raw_ostream & OS,const char * Key,StringRef Value)231 static void printDatum(raw_ostream &OS, const char *Key, StringRef Value) {
232   OS << ",\"" << Key << "\":\"" << Value << '"';
233   LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n');
234 }
printDatum(raw_ostream & OS,const char * Key,uint64_t Value)235 static void printDatum(raw_ostream &OS, const char *Key, uint64_t Value) {
236   OS << ",\"" << Key << "\":" << Value;
237   LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n');
238 }
239 /// \}
240 
241 /// Collect debug info quality metrics for an entire DIContext.
242 ///
243 /// Do the impossible and reduce the quality of the debug info down to a few
244 /// numbers. The idea is to condense the data into numbers that can be tracked
245 /// over time to identify trends in newer compiler versions and gauge the effect
246 /// of particular optimizations. The raw numbers themselves are not particularly
247 /// useful, only the delta between compiling the same program with different
248 /// compilers is.
collectStatsForObjectFile(ObjectFile & Obj,DWARFContext & DICtx,Twine Filename,raw_ostream & OS)249 bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
250                                Twine Filename, raw_ostream &OS) {
251   StringRef FormatName = Obj.getFileFormatName();
252   GlobalStats GlobalStats;
253   StringMap<PerFunctionStats> Statistics;
254   for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units())
255     if (DWARFDie CUDie = CU->getUnitDIE(false))
256       collectStatsRecursive(CUDie, "/", "g", 0, 0, 0, Statistics, GlobalStats);
257 
258   /// The version number should be increased every time the algorithm is changed
259   /// (including bug fixes). New metrics may be added without increasing the
260   /// version.
261   unsigned Version = 1;
262   unsigned VarTotal = 0;
263   unsigned VarUnique = 0;
264   unsigned VarWithLoc = 0;
265   unsigned NumFunctions = 0;
266   unsigned NumInlinedFunctions = 0;
267   for (auto &Entry : Statistics) {
268     PerFunctionStats &Stats = Entry.getValue();
269     unsigned TotalVars = Stats.VarsInFunction.size() * Stats.NumFnInlined;
270     unsigned Constants = Stats.ConstantMembers;
271     VarWithLoc += Stats.TotalVarWithLoc + Constants;
272     VarTotal += TotalVars + Constants;
273     VarUnique += Stats.VarsInFunction.size();
274     LLVM_DEBUG(for (auto &V : Stats.VarsInFunction) llvm::dbgs()
275                << Entry.getKey() << ": " << V.getKey() << "\n");
276     NumFunctions += Stats.IsFunction;
277     NumInlinedFunctions += Stats.IsFunction * Stats.NumFnInlined;
278   }
279 
280   // Print summary.
281   OS.SetBufferSize(1024);
282   OS << "{\"version\":" << Version;
283   LLVM_DEBUG(llvm::dbgs() << "Variable location quality metrics\n";
284              llvm::dbgs() << "---------------------------------\n");
285   printDatum(OS, "file", Filename.str());
286   printDatum(OS, "format", FormatName);
287   printDatum(OS, "source functions", NumFunctions);
288   printDatum(OS, "inlined functions", NumInlinedFunctions);
289   printDatum(OS, "unique source variables", VarUnique);
290   printDatum(OS, "source variables", VarTotal);
291   printDatum(OS, "variables with location", VarWithLoc);
292   printDatum(OS, "call site entries", GlobalStats.CallSiteEntries);
293   printDatum(OS, "scope bytes total",
294              GlobalStats.ScopeBytesFromFirstDefinition);
295   printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered);
296   printDatum(OS, "total function size", GlobalStats.FunctionSize);
297   printDatum(OS, "total inlined function size", GlobalStats.InlineFunctionSize);
298   OS << "}\n";
299   LLVM_DEBUG(
300       llvm::dbgs() << "Total Availability: "
301                    << (int)std::round((VarWithLoc * 100.0) / VarTotal) << "%\n";
302       llvm::dbgs() << "PC Ranges covered: "
303                    << (int)std::round((GlobalStats.ScopeBytesCovered * 100.0) /
304                                       GlobalStats.ScopeBytesFromFirstDefinition)
305                    << "%\n");
306   return true;
307 }
308