1 //===-- Statistics.cpp - Debug Info quality metrics -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm-dwarfdump.h"
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/StringSet.h"
12 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
13 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
14 #include "llvm/Object/ObjectFile.h"
15 #include "llvm/Support/JSON.h"
16 
17 #define DEBUG_TYPE "dwarfdump"
18 using namespace llvm;
19 using namespace llvm::dwarfdump;
20 using namespace llvm::object;
21 
22 namespace {
23 /// This represents the number of categories of debug location coverage being
24 /// calculated. The first category is the number of variables with 0% location
25 /// coverage, but the last category is the number of variables with 100%
26 /// location coverage.
27 constexpr int NumOfCoverageCategories = 12;
28 
29 /// This is used for zero location coverage bucket.
30 constexpr unsigned ZeroCoverageBucket = 0;
31 
32 /// This represents variables DIE offsets.
33 using AbstractOriginVarsTy = llvm::SmallVector<uint64_t>;
34 /// This maps function DIE offset to its variables.
35 using AbstractOriginVarsTyMap = llvm::DenseMap<uint64_t, AbstractOriginVarsTy>;
36 /// This represents function DIE offsets containing an abstract_origin.
37 using FunctionsWithAbstractOriginTy = llvm::SmallVector<uint64_t>;
38 
39 /// Holds statistics for one function (or other entity that has a PC range and
40 /// contains variables, such as a compile unit).
41 struct PerFunctionStats {
42   /// Number of inlined instances of this function.
43   unsigned NumFnInlined = 0;
44   /// Number of out-of-line instances of this function.
45   unsigned NumFnOutOfLine = 0;
46   /// Number of inlined instances that have abstract origins.
47   unsigned NumAbstractOrigins = 0;
48   /// Number of variables and parameters with location across all inlined
49   /// instances.
50   unsigned TotalVarWithLoc = 0;
51   /// Number of constants with location across all inlined instances.
52   unsigned ConstantMembers = 0;
53   /// Number of arificial variables, parameters or members across all instances.
54   unsigned NumArtificial = 0;
55   /// List of all Variables and parameters in this function.
56   StringSet<> VarsInFunction;
57   /// Compile units also cover a PC range, but have this flag set to false.
58   bool IsFunction = false;
59   /// Function has source location information.
60   bool HasSourceLocation = false;
61   /// Number of function parameters.
62   unsigned NumParams = 0;
63   /// Number of function parameters with source location.
64   unsigned NumParamSourceLocations = 0;
65   /// Number of function parameters with type.
66   unsigned NumParamTypes = 0;
67   /// Number of function parameters with a DW_AT_location.
68   unsigned NumParamLocations = 0;
69   /// Number of local variables.
70   unsigned NumLocalVars = 0;
71   /// Number of local variables with source location.
72   unsigned NumLocalVarSourceLocations = 0;
73   /// Number of local variables with type.
74   unsigned NumLocalVarTypes = 0;
75   /// Number of local variables with DW_AT_location.
76   unsigned NumLocalVarLocations = 0;
77 };
78 
79 /// Holds accumulated global statistics about DIEs.
80 struct GlobalStats {
81   /// Total number of PC range bytes covered by DW_AT_locations.
82   unsigned TotalBytesCovered = 0;
83   /// Total number of parent DIE PC range bytes covered by DW_AT_Locations.
84   unsigned ScopeBytesCovered = 0;
85   /// Total number of PC range bytes in each variable's enclosing scope.
86   unsigned ScopeBytes = 0;
87   /// Total number of PC range bytes covered by DW_AT_locations with
88   /// the debug entry values (DW_OP_entry_value).
89   unsigned ScopeEntryValueBytesCovered = 0;
90   /// Total number of PC range bytes covered by DW_AT_locations of
91   /// formal parameters.
92   unsigned ParamScopeBytesCovered = 0;
93   /// Total number of PC range bytes in each parameter's enclosing scope.
94   unsigned ParamScopeBytes = 0;
95   /// Total number of PC range bytes covered by DW_AT_locations with
96   /// the debug entry values (DW_OP_entry_value) (only for parameters).
97   unsigned ParamScopeEntryValueBytesCovered = 0;
98   /// Total number of PC range bytes covered by DW_AT_locations (only for local
99   /// variables).
100   unsigned LocalVarScopeBytesCovered = 0;
101   /// Total number of PC range bytes in each local variable's enclosing scope.
102   unsigned LocalVarScopeBytes = 0;
103   /// Total number of PC range bytes covered by DW_AT_locations with
104   /// the debug entry values (DW_OP_entry_value) (only for local variables).
105   unsigned LocalVarScopeEntryValueBytesCovered = 0;
106   /// Total number of call site entries (DW_AT_call_file & DW_AT_call_line).
107   unsigned CallSiteEntries = 0;
108   /// Total number of call site DIEs (DW_TAG_call_site).
109   unsigned CallSiteDIEs = 0;
110   /// Total number of call site parameter DIEs (DW_TAG_call_site_parameter).
111   unsigned CallSiteParamDIEs = 0;
112   /// Total byte size of concrete functions. This byte size includes
113   /// inline functions contained in the concrete functions.
114   unsigned FunctionSize = 0;
115   /// Total byte size of inlined functions. This is the total number of bytes
116   /// for the top inline functions within concrete functions. This can help
117   /// tune the inline settings when compiling to match user expectations.
118   unsigned InlineFunctionSize = 0;
119 };
120 
121 /// Holds accumulated debug location statistics about local variables and
122 /// formal parameters.
123 struct LocationStats {
124   /// Map the scope coverage decile to the number of variables in the decile.
125   /// The first element of the array (at the index zero) represents the number
126   /// of variables with the no debug location at all, but the last element
127   /// in the vector represents the number of fully covered variables within
128   /// its scope.
129   std::vector<unsigned> VarParamLocStats{
130       std::vector<unsigned>(NumOfCoverageCategories, 0)};
131   /// Map non debug entry values coverage.
132   std::vector<unsigned> VarParamNonEntryValLocStats{
133       std::vector<unsigned>(NumOfCoverageCategories, 0)};
134   /// The debug location statistics for formal parameters.
135   std::vector<unsigned> ParamLocStats{
136       std::vector<unsigned>(NumOfCoverageCategories, 0)};
137   /// Map non debug entry values coverage for formal parameters.
138   std::vector<unsigned> ParamNonEntryValLocStats{
139       std::vector<unsigned>(NumOfCoverageCategories, 0)};
140   /// The debug location statistics for local variables.
141   std::vector<unsigned> LocalVarLocStats{
142       std::vector<unsigned>(NumOfCoverageCategories, 0)};
143   /// Map non debug entry values coverage for local variables.
144   std::vector<unsigned> LocalVarNonEntryValLocStats{
145       std::vector<unsigned>(NumOfCoverageCategories, 0)};
146   /// Total number of local variables and function parameters processed.
147   unsigned NumVarParam = 0;
148   /// Total number of formal parameters processed.
149   unsigned NumParam = 0;
150   /// Total number of local variables processed.
151   unsigned NumVar = 0;
152 };
153 } // namespace
154 
155 /// Collect debug location statistics for one DIE.
156 static void collectLocStats(uint64_t ScopeBytesCovered, uint64_t BytesInScope,
157                             std::vector<unsigned> &VarParamLocStats,
158                             std::vector<unsigned> &ParamLocStats,
159                             std::vector<unsigned> &LocalVarLocStats,
160                             bool IsParam, bool IsLocalVar) {
161   auto getCoverageBucket = [ScopeBytesCovered, BytesInScope]() -> unsigned {
162     // No debug location at all for the variable.
163     if (ScopeBytesCovered == 0)
164       return 0;
165     // Fully covered variable within its scope.
166     if (ScopeBytesCovered >= BytesInScope)
167       return NumOfCoverageCategories - 1;
168     // Get covered range (e.g. 20%-29%).
169     unsigned LocBucket = 100 * (double)ScopeBytesCovered / BytesInScope;
170     LocBucket /= 10;
171     return LocBucket + 1;
172   };
173 
174   unsigned CoverageBucket = getCoverageBucket();
175 
176   VarParamLocStats[CoverageBucket]++;
177   if (IsParam)
178     ParamLocStats[CoverageBucket]++;
179   else if (IsLocalVar)
180     LocalVarLocStats[CoverageBucket]++;
181 }
182 
183 /// Construct an identifier for a given DIE from its Prefix, Name, DeclFileName
184 /// and DeclLine. The identifier aims to be unique for any unique entities,
185 /// but keeping the same among different instances of the same entity.
186 static std::string constructDieID(DWARFDie Die,
187                                   StringRef Prefix = StringRef()) {
188   std::string IDStr;
189   llvm::raw_string_ostream ID(IDStr);
190   ID << Prefix
191      << Die.getName(DINameKind::LinkageName);
192 
193   // Prefix + Name is enough for local variables and parameters.
194   if (!Prefix.empty() && !Prefix.equals("g"))
195     return ID.str();
196 
197   auto DeclFile = Die.findRecursively(dwarf::DW_AT_decl_file);
198   std::string File;
199   if (DeclFile) {
200     DWARFUnit *U = Die.getDwarfUnit();
201     if (const auto *LT = U->getContext().getLineTableForUnit(U))
202       if (LT->getFileNameByIndex(
203               dwarf::toUnsigned(DeclFile, 0), U->getCompilationDir(),
204               DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File))
205         File = std::string(sys::path::filename(File));
206   }
207   ID << ":" << (File.empty() ? "/" : File);
208   ID << ":"
209      << dwarf::toUnsigned(Die.findRecursively(dwarf::DW_AT_decl_line), 0);
210   return ID.str();
211 }
212 
213 /// Return the number of bytes in the overlap of ranges A and B.
214 static uint64_t calculateOverlap(DWARFAddressRange A, DWARFAddressRange B) {
215   uint64_t Lower = std::max(A.LowPC, B.LowPC);
216   uint64_t Upper = std::min(A.HighPC, B.HighPC);
217   if (Lower >= Upper)
218     return 0;
219   return Upper - Lower;
220 }
221 
222 /// Collect debug info quality metrics for one DIE.
223 static void collectStatsForDie(DWARFDie Die, const std::string &FnPrefix,
224                                const std::string &VarPrefix,
225                                uint64_t BytesInScope, uint32_t InlineDepth,
226                                StringMap<PerFunctionStats> &FnStatMap,
227                                GlobalStats &GlobalStats,
228                                LocationStats &LocStats,
229                                AbstractOriginVarsTy *AbstractOriginVariables) {
230   const dwarf::Tag Tag = Die.getTag();
231   // Skip CU node.
232   if (Tag == dwarf::DW_TAG_compile_unit)
233     return;
234 
235   bool HasLoc = false;
236   bool HasSrcLoc = false;
237   bool HasType = false;
238   uint64_t TotalBytesCovered = 0;
239   uint64_t ScopeBytesCovered = 0;
240   uint64_t BytesEntryValuesCovered = 0;
241   auto &FnStats = FnStatMap[FnPrefix];
242   bool IsParam = Tag == dwarf::DW_TAG_formal_parameter;
243   bool IsLocalVar = Tag == dwarf::DW_TAG_variable;
244   bool IsConstantMember = Tag == dwarf::DW_TAG_member &&
245                           Die.find(dwarf::DW_AT_const_value);
246 
247   // For zero covered inlined variables the locstats will be
248   // calculated later.
249   bool DeferLocStats = false;
250 
251   if (Tag == dwarf::DW_TAG_call_site || Tag == dwarf::DW_TAG_GNU_call_site) {
252     GlobalStats.CallSiteDIEs++;
253     return;
254   }
255 
256   if (Tag == dwarf::DW_TAG_call_site_parameter ||
257       Tag == dwarf::DW_TAG_GNU_call_site_parameter) {
258     GlobalStats.CallSiteParamDIEs++;
259     return;
260   }
261 
262   if (!IsParam && !IsLocalVar && !IsConstantMember) {
263     // Not a variable or constant member.
264     return;
265   }
266 
267   // Ignore declarations of global variables.
268   if (IsLocalVar && Die.find(dwarf::DW_AT_declaration))
269     return;
270 
271   if (Die.findRecursively(dwarf::DW_AT_decl_file) &&
272       Die.findRecursively(dwarf::DW_AT_decl_line))
273     HasSrcLoc = true;
274 
275   if (Die.findRecursively(dwarf::DW_AT_type))
276     HasType = true;
277 
278   if (Die.find(dwarf::DW_AT_abstract_origin)) {
279     if (Die.find(dwarf::DW_AT_location) || Die.find(dwarf::DW_AT_const_value)) {
280       if (AbstractOriginVariables) {
281         auto Offset = Die.find(dwarf::DW_AT_abstract_origin);
282         // Do not track this variable any more, since it has location
283         // coverage.
284         llvm::erase_value(*AbstractOriginVariables, (*Offset).getRawUValue());
285       }
286     } else {
287       // The locstats will be handled at the end of
288       // the collectStatsRecursive().
289       DeferLocStats = true;
290     }
291   }
292 
293   auto IsEntryValue = [&](ArrayRef<uint8_t> D) -> bool {
294     DWARFUnit *U = Die.getDwarfUnit();
295     DataExtractor Data(toStringRef(D),
296                        Die.getDwarfUnit()->getContext().isLittleEndian(), 0);
297     DWARFExpression Expression(Data, U->getAddressByteSize(),
298                                U->getFormParams().Format);
299     // Consider the expression containing the DW_OP_entry_value as
300     // an entry value.
301     return llvm::any_of(Expression, [](DWARFExpression::Operation &Op) {
302       return Op.getCode() == dwarf::DW_OP_entry_value ||
303              Op.getCode() == dwarf::DW_OP_GNU_entry_value;
304     });
305   };
306 
307   if (Die.find(dwarf::DW_AT_const_value)) {
308     // This catches constant members *and* variables.
309     HasLoc = true;
310     ScopeBytesCovered = BytesInScope;
311     TotalBytesCovered = BytesInScope;
312   } else {
313     // Handle variables and function arguments.
314     Expected<std::vector<DWARFLocationExpression>> Loc =
315         Die.getLocations(dwarf::DW_AT_location);
316     if (!Loc) {
317       consumeError(Loc.takeError());
318     } else {
319       HasLoc = true;
320       // Get PC coverage.
321       auto Default = find_if(
322           *Loc, [](const DWARFLocationExpression &L) { return !L.Range; });
323       if (Default != Loc->end()) {
324         // Assume the entire range is covered by a single location.
325         ScopeBytesCovered = BytesInScope;
326         TotalBytesCovered = BytesInScope;
327       } else {
328         // Caller checks this Expected result already, it cannot fail.
329         auto ScopeRanges = cantFail(Die.getParent().getAddressRanges());
330         for (auto Entry : *Loc) {
331           TotalBytesCovered += Entry.Range->HighPC - Entry.Range->LowPC;
332           uint64_t ScopeBytesCoveredByEntry = 0;
333           // Calculate how many bytes of the parent scope this entry covers.
334           // FIXME: In section 2.6.2 of the DWARFv5 spec it says that "The
335           // address ranges defined by the bounded location descriptions of a
336           // location list may overlap". So in theory a variable can have
337           // multiple simultaneous locations, which would make this calculation
338           // misleading because we will count the overlapped areas
339           // twice. However, clang does not currently emit DWARF like this.
340           for (DWARFAddressRange R : ScopeRanges) {
341             ScopeBytesCoveredByEntry += calculateOverlap(*Entry.Range, R);
342           }
343           ScopeBytesCovered += ScopeBytesCoveredByEntry;
344           if (IsEntryValue(Entry.Expr))
345             BytesEntryValuesCovered += ScopeBytesCoveredByEntry;
346         }
347       }
348     }
349   }
350 
351   // Calculate the debug location statistics.
352   if (BytesInScope && !DeferLocStats) {
353     LocStats.NumVarParam++;
354     if (IsParam)
355       LocStats.NumParam++;
356     else if (IsLocalVar)
357       LocStats.NumVar++;
358 
359     collectLocStats(ScopeBytesCovered, BytesInScope, LocStats.VarParamLocStats,
360                     LocStats.ParamLocStats, LocStats.LocalVarLocStats, IsParam,
361                     IsLocalVar);
362     // Non debug entry values coverage statistics.
363     collectLocStats(ScopeBytesCovered - BytesEntryValuesCovered, BytesInScope,
364                     LocStats.VarParamNonEntryValLocStats,
365                     LocStats.ParamNonEntryValLocStats,
366                     LocStats.LocalVarNonEntryValLocStats, IsParam, IsLocalVar);
367   }
368 
369   // Collect PC range coverage data.
370   if (DWARFDie D =
371           Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin))
372     Die = D;
373 
374   std::string VarID = constructDieID(Die, VarPrefix);
375   FnStats.VarsInFunction.insert(VarID);
376 
377   GlobalStats.TotalBytesCovered += TotalBytesCovered;
378   if (BytesInScope) {
379     GlobalStats.ScopeBytesCovered += ScopeBytesCovered;
380     GlobalStats.ScopeBytes += BytesInScope;
381     GlobalStats.ScopeEntryValueBytesCovered += BytesEntryValuesCovered;
382     if (IsParam) {
383       GlobalStats.ParamScopeBytesCovered += ScopeBytesCovered;
384       GlobalStats.ParamScopeBytes += BytesInScope;
385       GlobalStats.ParamScopeEntryValueBytesCovered += BytesEntryValuesCovered;
386     } else if (IsLocalVar) {
387       GlobalStats.LocalVarScopeBytesCovered += ScopeBytesCovered;
388       GlobalStats.LocalVarScopeBytes += BytesInScope;
389       GlobalStats.LocalVarScopeEntryValueBytesCovered +=
390           BytesEntryValuesCovered;
391     }
392     assert(GlobalStats.ScopeBytesCovered <= GlobalStats.ScopeBytes);
393   }
394 
395   if (IsConstantMember) {
396     FnStats.ConstantMembers++;
397     return;
398   }
399 
400   FnStats.TotalVarWithLoc += (unsigned)HasLoc;
401 
402   if (Die.find(dwarf::DW_AT_artificial)) {
403     FnStats.NumArtificial++;
404     return;
405   }
406 
407   if (IsParam) {
408     FnStats.NumParams++;
409     if (HasType)
410       FnStats.NumParamTypes++;
411     if (HasSrcLoc)
412       FnStats.NumParamSourceLocations++;
413     if (HasLoc)
414       FnStats.NumParamLocations++;
415   } else if (IsLocalVar) {
416     FnStats.NumLocalVars++;
417     if (HasType)
418       FnStats.NumLocalVarTypes++;
419     if (HasSrcLoc)
420       FnStats.NumLocalVarSourceLocations++;
421     if (HasLoc)
422       FnStats.NumLocalVarLocations++;
423   }
424 }
425 
426 /// Recursively collect variables from subprogram with DW_AT_inline attribute.
427 static void collectAbstractOriginFnInfo(
428     DWARFDie Die, uint64_t SPOffset,
429     AbstractOriginVarsTyMap &GlobalAbstractOriginFnInfo) {
430   DWARFDie Child = Die.getFirstChild();
431   while (Child) {
432     const dwarf::Tag ChildTag = Child.getTag();
433     if (ChildTag == dwarf::DW_TAG_formal_parameter ||
434         ChildTag == dwarf::DW_TAG_variable)
435       GlobalAbstractOriginFnInfo[SPOffset].push_back(Child.getOffset());
436     else if (ChildTag == dwarf::DW_TAG_lexical_block)
437       collectAbstractOriginFnInfo(Child, SPOffset, GlobalAbstractOriginFnInfo);
438     Child = Child.getSibling();
439   }
440 }
441 
442 /// Recursively collect debug info quality metrics.
443 static void collectStatsRecursive(
444     DWARFDie Die, std::string FnPrefix, std::string VarPrefix,
445     uint64_t BytesInScope, uint32_t InlineDepth,
446     StringMap<PerFunctionStats> &FnStatMap, GlobalStats &GlobalStats,
447     LocationStats &LocStats,
448     AbstractOriginVarsTyMap &GlobalAbstractOriginFnInfo,
449     FunctionsWithAbstractOriginTy &FnsWithAbstractOriginToBeProcessed,
450     AbstractOriginVarsTy *AbstractOriginVarsPtr = nullptr) {
451   // Skip NULL nodes.
452   if (Die.isNULL())
453     return;
454 
455   const dwarf::Tag Tag = Die.getTag();
456   // Skip function types.
457   if (Tag == dwarf::DW_TAG_subroutine_type)
458     return;
459 
460   // Handle any kind of lexical scope.
461   const bool HasAbstractOrigin = Die.find(dwarf::DW_AT_abstract_origin) != None;
462   const bool IsFunction = Tag == dwarf::DW_TAG_subprogram;
463   const bool IsBlock = Tag == dwarf::DW_TAG_lexical_block;
464   const bool IsInlinedFunction = Tag == dwarf::DW_TAG_inlined_subroutine;
465   // We want to know how many variables (with abstract_origin) don't have
466   // location info.
467   const bool IsCandidateForZeroLocCovTracking =
468       (IsInlinedFunction || (IsFunction && HasAbstractOrigin));
469 
470   AbstractOriginVarsTy AbstractOriginVars;
471 
472   // Get the vars of the inlined fn, so the locstats
473   // reports the missing vars (with coverage 0%).
474   if (IsCandidateForZeroLocCovTracking) {
475     auto OffsetFn = Die.find(dwarf::DW_AT_abstract_origin);
476     if (OffsetFn) {
477       uint64_t OffsetOfInlineFnCopy = (*OffsetFn).getRawUValue();
478       if (GlobalAbstractOriginFnInfo.count(OffsetOfInlineFnCopy)) {
479         AbstractOriginVars = GlobalAbstractOriginFnInfo[OffsetOfInlineFnCopy];
480         AbstractOriginVarsPtr = &AbstractOriginVars;
481       } else {
482         // This means that the DW_AT_inline fn copy is out of order,
483         // so this abstract origin instance will be processed later.
484         FnsWithAbstractOriginToBeProcessed.push_back(Die.getOffset());
485         AbstractOriginVarsPtr = nullptr;
486       }
487     }
488   }
489 
490   if (IsFunction || IsInlinedFunction || IsBlock) {
491     // Reset VarPrefix when entering a new function.
492     if (IsFunction || IsInlinedFunction)
493       VarPrefix = "v";
494 
495     // Ignore forward declarations.
496     if (Die.find(dwarf::DW_AT_declaration))
497       return;
498 
499     // Check for call sites.
500     if (Die.find(dwarf::DW_AT_call_file) && Die.find(dwarf::DW_AT_call_line))
501       GlobalStats.CallSiteEntries++;
502 
503     // PC Ranges.
504     auto RangesOrError = Die.getAddressRanges();
505     if (!RangesOrError) {
506       llvm::consumeError(RangesOrError.takeError());
507       return;
508     }
509 
510     auto Ranges = RangesOrError.get();
511     uint64_t BytesInThisScope = 0;
512     for (auto Range : Ranges)
513       BytesInThisScope += Range.HighPC - Range.LowPC;
514 
515     // Count the function.
516     if (!IsBlock) {
517       // Skip over abstract origins, but collect variables
518       // from it so it can be used for location statistics
519       // for inlined instancies.
520       if (Die.find(dwarf::DW_AT_inline)) {
521         uint64_t SPOffset = Die.getOffset();
522         collectAbstractOriginFnInfo(Die, SPOffset, GlobalAbstractOriginFnInfo);
523         return;
524       }
525 
526       std::string FnID = constructDieID(Die);
527       // We've seen an instance of this function.
528       auto &FnStats = FnStatMap[FnID];
529       FnStats.IsFunction = true;
530       if (IsInlinedFunction) {
531         FnStats.NumFnInlined++;
532         if (Die.findRecursively(dwarf::DW_AT_abstract_origin))
533           FnStats.NumAbstractOrigins++;
534       } else {
535         FnStats.NumFnOutOfLine++;
536       }
537       if (Die.findRecursively(dwarf::DW_AT_decl_file) &&
538           Die.findRecursively(dwarf::DW_AT_decl_line))
539         FnStats.HasSourceLocation = true;
540       // Update function prefix.
541       FnPrefix = FnID;
542     }
543 
544     if (BytesInThisScope) {
545       BytesInScope = BytesInThisScope;
546       if (IsFunction)
547         GlobalStats.FunctionSize += BytesInThisScope;
548       else if (IsInlinedFunction && InlineDepth == 0)
549         GlobalStats.InlineFunctionSize += BytesInThisScope;
550     }
551   } else {
552     // Not a scope, visit the Die itself. It could be a variable.
553     collectStatsForDie(Die, FnPrefix, VarPrefix, BytesInScope, InlineDepth,
554                        FnStatMap, GlobalStats, LocStats, AbstractOriginVarsPtr);
555   }
556 
557   // Set InlineDepth correctly for child recursion
558   if (IsFunction)
559     InlineDepth = 0;
560   else if (IsInlinedFunction)
561     ++InlineDepth;
562 
563   // Traverse children.
564   unsigned LexicalBlockIndex = 0;
565   unsigned FormalParameterIndex = 0;
566   DWARFDie Child = Die.getFirstChild();
567   while (Child) {
568     std::string ChildVarPrefix = VarPrefix;
569     if (Child.getTag() == dwarf::DW_TAG_lexical_block)
570       ChildVarPrefix += toHex(LexicalBlockIndex++) + '.';
571     if (Child.getTag() == dwarf::DW_TAG_formal_parameter)
572       ChildVarPrefix += 'p' + toHex(FormalParameterIndex++) + '.';
573 
574     collectStatsRecursive(
575         Child, FnPrefix, ChildVarPrefix, BytesInScope, InlineDepth, FnStatMap,
576         GlobalStats, LocStats, GlobalAbstractOriginFnInfo,
577         FnsWithAbstractOriginToBeProcessed, AbstractOriginVarsPtr);
578     Child = Child.getSibling();
579   }
580 
581   if (!IsCandidateForZeroLocCovTracking)
582     return;
583 
584   // After we have processed all vars of the inlined function (or function with
585   // an abstract_origin), we want to know how many variables have no location.
586   for (auto Offset : AbstractOriginVars) {
587     LocStats.NumVarParam++;
588     LocStats.VarParamLocStats[ZeroCoverageBucket]++;
589     auto FnDie = Die.getDwarfUnit()->getDIEForOffset(Offset);
590     if (!FnDie)
591       continue;
592     auto Tag = FnDie.getTag();
593     if (Tag == dwarf::DW_TAG_formal_parameter) {
594       LocStats.NumParam++;
595       LocStats.ParamLocStats[ZeroCoverageBucket]++;
596     } else if (Tag == dwarf::DW_TAG_variable) {
597       LocStats.NumVar++;
598       LocStats.LocalVarLocStats[ZeroCoverageBucket]++;
599     }
600   }
601 }
602 
603 /// Print human-readable output.
604 /// \{
605 static void printDatum(json::OStream &J, const char *Key, json::Value Value) {
606   J.attribute(Key, Value);
607   LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n');
608 }
609 
610 static void printLocationStats(json::OStream &J, const char *Key,
611                                std::vector<unsigned> &LocationStats) {
612   J.attribute(
613       (Twine(Key) + " with 0% of parent scope covered by DW_AT_location").str(),
614       LocationStats[0]);
615   LLVM_DEBUG(
616       llvm::dbgs() << Key
617                    << " with 0% of parent scope covered by DW_AT_location: \\"
618                    << LocationStats[0] << '\n');
619   J.attribute(
620       (Twine(Key) + " with (0%,10%) of parent scope covered by DW_AT_location")
621           .str(),
622       LocationStats[1]);
623   LLVM_DEBUG(llvm::dbgs()
624              << Key
625              << " with (0%,10%) of parent scope covered by DW_AT_location: "
626              << LocationStats[1] << '\n');
627   for (unsigned i = 2; i < NumOfCoverageCategories - 1; ++i) {
628     J.attribute((Twine(Key) + " with [" + Twine((i - 1) * 10) + "%," +
629                  Twine(i * 10) + "%) of parent scope covered by DW_AT_location")
630                     .str(),
631                 LocationStats[i]);
632     LLVM_DEBUG(llvm::dbgs()
633                << Key << " with [" << (i - 1) * 10 << "%," << i * 10
634                << "%) of parent scope covered by DW_AT_location: "
635                << LocationStats[i]);
636   }
637   J.attribute(
638       (Twine(Key) + " with 100% of parent scope covered by DW_AT_location")
639           .str(),
640       LocationStats[NumOfCoverageCategories - 1]);
641   LLVM_DEBUG(
642       llvm::dbgs() << Key
643                    << " with 100% of parent scope covered by DW_AT_location: "
644                    << LocationStats[NumOfCoverageCategories - 1]);
645 }
646 
647 static void printSectionSizes(json::OStream &J, const SectionSizes &Sizes) {
648   for (const auto &It : Sizes.DebugSectionSizes)
649     J.attribute((Twine("#bytes in ") + It.first).str(), int64_t(It.second));
650 }
651 
652 /// Stop tracking variables that contain abstract_origin with a location.
653 /// This is used for out-of-order DW_AT_inline subprograms only.
654 static void updateVarsWithAbstractOriginLocCovInfo(
655     DWARFDie FnDieWithAbstractOrigin,
656     AbstractOriginVarsTy &AbstractOriginVars) {
657   DWARFDie Child = FnDieWithAbstractOrigin.getFirstChild();
658   while (Child) {
659     const dwarf::Tag ChildTag = Child.getTag();
660     if ((ChildTag == dwarf::DW_TAG_formal_parameter ||
661          ChildTag == dwarf::DW_TAG_variable) &&
662         (Child.find(dwarf::DW_AT_location) ||
663          Child.find(dwarf::DW_AT_const_value))) {
664       auto OffsetVar = Child.find(dwarf::DW_AT_abstract_origin);
665       if (OffsetVar)
666         llvm::erase_value(AbstractOriginVars, (*OffsetVar).getRawUValue());
667     } else if (ChildTag == dwarf::DW_TAG_lexical_block)
668       updateVarsWithAbstractOriginLocCovInfo(Child, AbstractOriginVars);
669     Child = Child.getSibling();
670   }
671 }
672 
673 /// Collect zero location coverage for inlined variables which refer to
674 /// a DW_AT_inline copy of subprogram that is out of order in the DWARF.
675 /// Also cover the variables of a concrete function (represented with
676 /// the DW_TAG_subprogram) with an abstract_origin attribute.
677 static void collectZeroLocCovForVarsWithAbstractOrigin(
678     DWARFUnit *DwUnit, GlobalStats &GlobalStats, LocationStats &LocStats,
679     AbstractOriginVarsTyMap &GlobalAbstractOriginFnInfo,
680     FunctionsWithAbstractOriginTy &FnsWithAbstractOriginToBeProcessed) {
681   for (auto FnOffset : FnsWithAbstractOriginToBeProcessed) {
682     DWARFDie FnDieWithAbstractOrigin = DwUnit->getDIEForOffset(FnOffset);
683     auto FnCopy = FnDieWithAbstractOrigin.find(dwarf::DW_AT_abstract_origin);
684     AbstractOriginVarsTy AbstractOriginVars;
685     if (!FnCopy)
686       continue;
687 
688     AbstractOriginVars = GlobalAbstractOriginFnInfo[(*FnCopy).getRawUValue()];
689     updateVarsWithAbstractOriginLocCovInfo(FnDieWithAbstractOrigin,
690                                            AbstractOriginVars);
691 
692     for (auto Offset : AbstractOriginVars) {
693       LocStats.NumVarParam++;
694       LocStats.VarParamLocStats[ZeroCoverageBucket]++;
695       auto Tag = DwUnit->getDIEForOffset(Offset).getTag();
696       if (Tag == dwarf::DW_TAG_formal_parameter) {
697         LocStats.NumParam++;
698         LocStats.ParamLocStats[ZeroCoverageBucket]++;
699       } else if (Tag == dwarf::DW_TAG_variable) {
700         LocStats.NumVar++;
701         LocStats.LocalVarLocStats[ZeroCoverageBucket]++;
702       }
703     }
704   }
705 }
706 
707 /// \}
708 
709 /// Collect debug info quality metrics for an entire DIContext.
710 ///
711 /// Do the impossible and reduce the quality of the debug info down to a few
712 /// numbers. The idea is to condense the data into numbers that can be tracked
713 /// over time to identify trends in newer compiler versions and gauge the effect
714 /// of particular optimizations. The raw numbers themselves are not particularly
715 /// useful, only the delta between compiling the same program with different
716 /// compilers is.
717 bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
718                                           const Twine &Filename,
719                                           raw_ostream &OS) {
720   StringRef FormatName = Obj.getFileFormatName();
721   GlobalStats GlobalStats;
722   LocationStats LocStats;
723   StringMap<PerFunctionStats> Statistics;
724   for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units()) {
725     if (DWARFDie CUDie = CU->getNonSkeletonUnitDIE(false)) {
726       // These variables are being reset for each CU, since there could be
727       // a situation where we have two subprogram DIEs with the same offsets
728       // in two diferent CUs, and we can end up using wrong variables info
729       // when trying to resolve abstract_origin attribute.
730       // TODO: Handle LTO cases where the abstract origin of
731       // the function is in a different CU than the one it's
732       // referenced from or inlined into.
733       AbstractOriginVarsTyMap GlobalAbstractOriginFnInfo;
734       FunctionsWithAbstractOriginTy FnsWithAbstractOriginToBeProcessed;
735 
736       collectStatsRecursive(CUDie, "/", "g", 0, 0, Statistics, GlobalStats,
737                             LocStats, GlobalAbstractOriginFnInfo,
738                             FnsWithAbstractOriginToBeProcessed);
739 
740       collectZeroLocCovForVarsWithAbstractOrigin(
741           CUDie.getDwarfUnit(), GlobalStats, LocStats,
742           GlobalAbstractOriginFnInfo, FnsWithAbstractOriginToBeProcessed);
743     }
744   }
745 
746   /// Collect the sizes of debug sections.
747   SectionSizes Sizes;
748   calculateSectionSizes(Obj, Sizes, Filename);
749 
750   /// The version number should be increased every time the algorithm is changed
751   /// (including bug fixes). New metrics may be added without increasing the
752   /// version.
753   unsigned Version = 8;
754   unsigned VarParamTotal = 0;
755   unsigned VarParamUnique = 0;
756   unsigned VarParamWithLoc = 0;
757   unsigned NumFunctions = 0;
758   unsigned NumInlinedFunctions = 0;
759   unsigned NumFuncsWithSrcLoc = 0;
760   unsigned NumAbstractOrigins = 0;
761   unsigned ParamTotal = 0;
762   unsigned ParamWithType = 0;
763   unsigned ParamWithLoc = 0;
764   unsigned ParamWithSrcLoc = 0;
765   unsigned LocalVarTotal = 0;
766   unsigned LocalVarWithType = 0;
767   unsigned LocalVarWithSrcLoc = 0;
768   unsigned LocalVarWithLoc = 0;
769   for (auto &Entry : Statistics) {
770     PerFunctionStats &Stats = Entry.getValue();
771     unsigned TotalVars = Stats.VarsInFunction.size() *
772                          (Stats.NumFnInlined + Stats.NumFnOutOfLine);
773     // Count variables in global scope.
774     if (!Stats.IsFunction)
775       TotalVars =
776           Stats.NumLocalVars + Stats.ConstantMembers + Stats.NumArtificial;
777     unsigned Constants = Stats.ConstantMembers;
778     VarParamWithLoc += Stats.TotalVarWithLoc + Constants;
779     VarParamTotal += TotalVars;
780     VarParamUnique += Stats.VarsInFunction.size();
781     LLVM_DEBUG(for (auto &V
782                     : Stats.VarsInFunction) llvm::dbgs()
783                << Entry.getKey() << ": " << V.getKey() << "\n");
784     NumFunctions += Stats.IsFunction;
785     NumFuncsWithSrcLoc += Stats.HasSourceLocation;
786     NumInlinedFunctions += Stats.IsFunction * Stats.NumFnInlined;
787     NumAbstractOrigins += Stats.IsFunction * Stats.NumAbstractOrigins;
788     ParamTotal += Stats.NumParams;
789     ParamWithType += Stats.NumParamTypes;
790     ParamWithLoc += Stats.NumParamLocations;
791     ParamWithSrcLoc += Stats.NumParamSourceLocations;
792     LocalVarTotal += Stats.NumLocalVars;
793     LocalVarWithType += Stats.NumLocalVarTypes;
794     LocalVarWithLoc += Stats.NumLocalVarLocations;
795     LocalVarWithSrcLoc += Stats.NumLocalVarSourceLocations;
796   }
797 
798   // Print summary.
799   OS.SetBufferSize(1024);
800   json::OStream J(OS, 2);
801   J.objectBegin();
802   J.attribute("version", Version);
803   LLVM_DEBUG(llvm::dbgs() << "Variable location quality metrics\n";
804              llvm::dbgs() << "---------------------------------\n");
805 
806   printDatum(J, "file", Filename.str());
807   printDatum(J, "format", FormatName);
808 
809   printDatum(J, "#functions", NumFunctions);
810   printDatum(J, "#functions with location", NumFuncsWithSrcLoc);
811   printDatum(J, "#inlined functions", NumInlinedFunctions);
812   printDatum(J, "#inlined functions with abstract origins", NumAbstractOrigins);
813 
814   // This includes local variables and formal parameters.
815   printDatum(J, "#unique source variables", VarParamUnique);
816   printDatum(J, "#source variables", VarParamTotal);
817   printDatum(J, "#source variables with location", VarParamWithLoc);
818 
819   printDatum(J, "#call site entries", GlobalStats.CallSiteEntries);
820   printDatum(J, "#call site DIEs", GlobalStats.CallSiteDIEs);
821   printDatum(J, "#call site parameter DIEs", GlobalStats.CallSiteParamDIEs);
822 
823   printDatum(J, "sum_all_variables(#bytes in parent scope)",
824              GlobalStats.ScopeBytes);
825   printDatum(J,
826              "sum_all_variables(#bytes in any scope covered by DW_AT_location)",
827              GlobalStats.TotalBytesCovered);
828   printDatum(J,
829              "sum_all_variables(#bytes in parent scope covered by "
830              "DW_AT_location)",
831              GlobalStats.ScopeBytesCovered);
832   printDatum(J,
833              "sum_all_variables(#bytes in parent scope covered by "
834              "DW_OP_entry_value)",
835              GlobalStats.ScopeEntryValueBytesCovered);
836 
837   printDatum(J, "sum_all_params(#bytes in parent scope)",
838              GlobalStats.ParamScopeBytes);
839   printDatum(J,
840              "sum_all_params(#bytes in parent scope covered by DW_AT_location)",
841              GlobalStats.ParamScopeBytesCovered);
842   printDatum(J,
843              "sum_all_params(#bytes in parent scope covered by "
844              "DW_OP_entry_value)",
845              GlobalStats.ParamScopeEntryValueBytesCovered);
846 
847   printDatum(J, "sum_all_local_vars(#bytes in parent scope)",
848              GlobalStats.LocalVarScopeBytes);
849   printDatum(J,
850              "sum_all_local_vars(#bytes in parent scope covered by "
851              "DW_AT_location)",
852              GlobalStats.LocalVarScopeBytesCovered);
853   printDatum(J,
854              "sum_all_local_vars(#bytes in parent scope covered by "
855              "DW_OP_entry_value)",
856              GlobalStats.LocalVarScopeEntryValueBytesCovered);
857 
858   printDatum(J, "#bytes within functions", GlobalStats.FunctionSize);
859   printDatum(J, "#bytes within inlined functions",
860              GlobalStats.InlineFunctionSize);
861 
862   // Print the summary for formal parameters.
863   printDatum(J, "#params", ParamTotal);
864   printDatum(J, "#params with source location", ParamWithSrcLoc);
865   printDatum(J, "#params with type", ParamWithType);
866   printDatum(J, "#params with binary location", ParamWithLoc);
867 
868   // Print the summary for local variables.
869   printDatum(J, "#local vars", LocalVarTotal);
870   printDatum(J, "#local vars with source location", LocalVarWithSrcLoc);
871   printDatum(J, "#local vars with type", LocalVarWithType);
872   printDatum(J, "#local vars with binary location", LocalVarWithLoc);
873 
874   // Print the debug section sizes.
875   printSectionSizes(J, Sizes);
876 
877   // Print the location statistics for variables (includes local variables
878   // and formal parameters).
879   printDatum(J, "#variables processed by location statistics",
880              LocStats.NumVarParam);
881   printLocationStats(J, "#variables", LocStats.VarParamLocStats);
882   printLocationStats(J, "#variables - entry values",
883                      LocStats.VarParamNonEntryValLocStats);
884 
885   // Print the location statistics for formal parameters.
886   printDatum(J, "#params processed by location statistics", LocStats.NumParam);
887   printLocationStats(J, "#params", LocStats.ParamLocStats);
888   printLocationStats(J, "#params - entry values",
889                      LocStats.ParamNonEntryValLocStats);
890 
891   // Print the location statistics for local variables.
892   printDatum(J, "#local vars processed by location statistics",
893              LocStats.NumVar);
894   printLocationStats(J, "#local vars", LocStats.LocalVarLocStats);
895   printLocationStats(J, "#local vars - entry values",
896                      LocStats.LocalVarNonEntryValLocStats);
897   J.objectEnd();
898   OS << '\n';
899   LLVM_DEBUG(
900       llvm::dbgs() << "Total Availability: "
901                    << (int)std::round((VarParamWithLoc * 100.0) / VarParamTotal)
902                    << "%\n";
903       llvm::dbgs() << "PC Ranges covered: "
904                    << (int)std::round((GlobalStats.ScopeBytesCovered * 100.0) /
905                                       GlobalStats.ScopeBytes)
906                    << "%\n");
907   return true;
908 }
909