1 //===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfiledBinary.h"
10 #include "ErrorHandling.h"
11 #include "MissingFrameInferrer.h"
12 #include "ProfileGenerator.h"
13 #include "llvm/ADT/Triple.h"
14 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
15 #include "llvm/Demangle/Demangle.h"
16 #include "llvm/IR/DebugInfoMetadata.h"
17 #include "llvm/MC/TargetRegistry.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/Format.h"
21 #include "llvm/Support/TargetSelect.h"
22 #include <optional>
23 
24 #define DEBUG_TYPE "load-binary"
25 
26 using namespace llvm;
27 using namespace sampleprof;
28 
29 cl::opt<bool> ShowDisassemblyOnly("show-disassembly-only",
30                                   cl::desc("Print disassembled code."));
31 
32 cl::opt<bool> ShowSourceLocations("show-source-locations",
33                                   cl::desc("Print source locations."));
34 
35 static cl::opt<bool>
36     ShowCanonicalFnName("show-canonical-fname",
37                         cl::desc("Print canonical function name."));
38 
39 static cl::opt<bool> ShowPseudoProbe(
40     "show-pseudo-probe",
41     cl::desc("Print pseudo probe section and disassembled info."));
42 
43 static cl::opt<bool> UseDwarfCorrelation(
44     "use-dwarf-correlation",
45     cl::desc("Use dwarf for profile correlation even when binary contains "
46              "pseudo probe."));
47 
48 static cl::opt<std::string>
49     DWPPath("dwp", cl::init(""),
50             cl::desc("Path of .dwp file. When not specified, it will be "
51                      "<binary>.dwp in the same directory as the main binary."));
52 
53 static cl::list<std::string> DisassembleFunctions(
54     "disassemble-functions", cl::CommaSeparated,
55     cl::desc("List of functions to print disassembly for. Accept demangled "
56              "names only. Only work with show-disassembly-only"));
57 
58 extern cl::opt<bool> ShowDetailedWarning;
59 extern cl::opt<bool> InferMissingFrames;
60 
61 namespace llvm {
62 namespace sampleprof {
63 
getTarget(const ObjectFile * Obj)64 static const Target *getTarget(const ObjectFile *Obj) {
65   Triple TheTriple = Obj->makeTriple();
66   std::string Error;
67   std::string ArchName;
68   const Target *TheTarget =
69       TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
70   if (!TheTarget)
71     exitWithError(Error, Obj->getFileName());
72   return TheTarget;
73 }
74 
addInstructionForContext(const SampleContextFrameVector & Context,uint32_t InstrSize)75 void BinarySizeContextTracker::addInstructionForContext(
76     const SampleContextFrameVector &Context, uint32_t InstrSize) {
77   ContextTrieNode *CurNode = &RootContext;
78   bool IsLeaf = true;
79   for (const auto &Callsite : reverse(Context)) {
80     StringRef CallerName = Callsite.FuncName;
81     LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.Location;
82     CurNode = CurNode->getOrCreateChildContext(CallsiteLoc, CallerName);
83     IsLeaf = false;
84   }
85 
86   CurNode->addFunctionSize(InstrSize);
87 }
88 
89 uint32_t
getFuncSizeForContext(const ContextTrieNode * Node)90 BinarySizeContextTracker::getFuncSizeForContext(const ContextTrieNode *Node) {
91   ContextTrieNode *CurrNode = &RootContext;
92   ContextTrieNode *PrevNode = nullptr;
93 
94   std::optional<uint32_t> Size;
95 
96   // Start from top-level context-less function, traverse down the reverse
97   // context trie to find the best/longest match for given context, then
98   // retrieve the size.
99   LineLocation CallSiteLoc(0, 0);
100   while (CurrNode && Node->getParentContext() != nullptr) {
101     PrevNode = CurrNode;
102     CurrNode = CurrNode->getChildContext(CallSiteLoc, Node->getFuncName());
103     if (CurrNode && CurrNode->getFunctionSize())
104       Size = *CurrNode->getFunctionSize();
105     CallSiteLoc = Node->getCallSiteLoc();
106     Node = Node->getParentContext();
107   }
108 
109   // If we traversed all nodes along the path of the context and haven't
110   // found a size yet, pivot to look for size from sibling nodes, i.e size
111   // of inlinee under different context.
112   if (!Size) {
113     if (!CurrNode)
114       CurrNode = PrevNode;
115     while (!Size && CurrNode && !CurrNode->getAllChildContext().empty()) {
116       CurrNode = &CurrNode->getAllChildContext().begin()->second;
117       if (CurrNode->getFunctionSize())
118         Size = *CurrNode->getFunctionSize();
119     }
120   }
121 
122   assert(Size && "We should at least find one context size.");
123   return *Size;
124 }
125 
trackInlineesOptimizedAway(MCPseudoProbeDecoder & ProbeDecoder)126 void BinarySizeContextTracker::trackInlineesOptimizedAway(
127     MCPseudoProbeDecoder &ProbeDecoder) {
128   ProbeFrameStack ProbeContext;
129   for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren())
130     trackInlineesOptimizedAway(ProbeDecoder, *Child.second.get(), ProbeContext);
131 }
132 
trackInlineesOptimizedAway(MCPseudoProbeDecoder & ProbeDecoder,MCDecodedPseudoProbeInlineTree & ProbeNode,ProbeFrameStack & ProbeContext)133 void BinarySizeContextTracker::trackInlineesOptimizedAway(
134     MCPseudoProbeDecoder &ProbeDecoder,
135     MCDecodedPseudoProbeInlineTree &ProbeNode, ProbeFrameStack &ProbeContext) {
136   StringRef FuncName =
137       ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName;
138   ProbeContext.emplace_back(FuncName, 0);
139 
140   // This ProbeContext has a probe, so it has code before inlining and
141   // optimization. Make sure we mark its size as known.
142   if (!ProbeNode.getProbes().empty()) {
143     ContextTrieNode *SizeContext = &RootContext;
144     for (auto &ProbeFrame : reverse(ProbeContext)) {
145       StringRef CallerName = ProbeFrame.first;
146       LineLocation CallsiteLoc(ProbeFrame.second, 0);
147       SizeContext =
148           SizeContext->getOrCreateChildContext(CallsiteLoc, CallerName);
149     }
150     // Add 0 size to make known.
151     SizeContext->addFunctionSize(0);
152   }
153 
154   // DFS down the probe inline tree
155   for (const auto &ChildNode : ProbeNode.getChildren()) {
156     InlineSite Location = ChildNode.first;
157     ProbeContext.back().second = std::get<1>(Location);
158     trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(),
159                                ProbeContext);
160   }
161 
162   ProbeContext.pop_back();
163 }
164 
ProfiledBinary(const StringRef ExeBinPath,const StringRef DebugBinPath)165 ProfiledBinary::ProfiledBinary(const StringRef ExeBinPath,
166                              const StringRef DebugBinPath)
167     : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this),
168       TrackFuncContextSize(EnableCSPreInliner && UseContextCostForPreInliner) {
169   // Point to executable binary if debug info binary is not specified.
170   SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath;
171   setupSymbolizer();
172   if (InferMissingFrames)
173     MissingContextInferrer = std::make_unique<MissingFrameInferrer>(this);
174   load();
175 }
176 
~ProfiledBinary()177 ProfiledBinary::~ProfiledBinary() {}
178 
warnNoFuncEntry()179 void ProfiledBinary::warnNoFuncEntry() {
180   uint64_t NoFuncEntryNum = 0;
181   for (auto &F : BinaryFunctions) {
182     if (F.second.Ranges.empty())
183       continue;
184     bool hasFuncEntry = false;
185     for (auto &R : F.second.Ranges) {
186       if (FuncRange *FR = findFuncRangeForStartAddr(R.first)) {
187         if (FR->IsFuncEntry) {
188           hasFuncEntry = true;
189           break;
190         }
191       }
192     }
193 
194     if (!hasFuncEntry) {
195       NoFuncEntryNum++;
196       if (ShowDetailedWarning)
197         WithColor::warning()
198             << "Failed to determine function entry for " << F.first
199             << " due to inconsistent name from symbol table and dwarf info.\n";
200     }
201   }
202   emitWarningSummary(NoFuncEntryNum, BinaryFunctions.size(),
203                      "of functions failed to determine function entry due to "
204                      "inconsistent name from symbol table and dwarf info.");
205 }
206 
load()207 void ProfiledBinary::load() {
208   // Attempt to open the binary.
209   OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
210   Binary &ExeBinary = *OBinary.getBinary();
211 
212   auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
213   if (!Obj)
214     exitWithError("not a valid Elf image", Path);
215 
216   TheTriple = Obj->makeTriple();
217   // Current only support X86
218   if (!TheTriple.isX86())
219     exitWithError("unsupported target", TheTriple.getTriple());
220   LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
221 
222   // Find the preferred load address for text sections.
223   setPreferredTextSegmentAddresses(Obj);
224 
225   // Load debug info of subprograms from DWARF section.
226   // If path of debug info binary is specified, use the debug info from it,
227   // otherwise use the debug info from the executable binary.
228   if (!DebugBinaryPath.empty()) {
229     OwningBinary<Binary> DebugPath =
230         unwrapOrError(createBinary(DebugBinaryPath), DebugBinaryPath);
231     loadSymbolsFromDWARF(*cast<ObjectFile>(DebugPath.getBinary()));
232   } else {
233     loadSymbolsFromDWARF(*cast<ObjectFile>(&ExeBinary));
234   }
235 
236   DisassembleFunctionSet.insert(DisassembleFunctions.begin(),
237                                 DisassembleFunctions.end());
238 
239   checkPseudoProbe(Obj);
240 
241   if (UsePseudoProbes)
242     populateElfSymbolAddressList(Obj);
243 
244   if (ShowDisassemblyOnly)
245     decodePseudoProbe(Obj);
246 
247   // Disassemble the text sections.
248   disassemble(Obj);
249 
250   // Use function start and return address to infer prolog and epilog
251   ProEpilogTracker.inferPrologAddresses(StartAddrToFuncRangeMap);
252   ProEpilogTracker.inferEpilogAddresses(RetAddressSet);
253 
254   warnNoFuncEntry();
255 
256   // TODO: decode other sections.
257 }
258 
inlineContextEqual(uint64_t Address1,uint64_t Address2)259 bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) {
260   const SampleContextFrameVector &Context1 =
261       getCachedFrameLocationStack(Address1);
262   const SampleContextFrameVector &Context2 =
263       getCachedFrameLocationStack(Address2);
264   if (Context1.size() != Context2.size())
265     return false;
266   if (Context1.empty())
267     return false;
268   // The leaf frame contains location within the leaf, and it
269   // needs to be remove that as it's not part of the calling context
270   return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
271                     Context2.begin(), Context2.begin() + Context2.size() - 1);
272 }
273 
274 SampleContextFrameVector
getExpandedContext(const SmallVectorImpl<uint64_t> & Stack,bool & WasLeafInlined)275 ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
276                                    bool &WasLeafInlined) {
277   SampleContextFrameVector ContextVec;
278   if (Stack.empty())
279     return ContextVec;
280   // Process from frame root to leaf
281   for (auto Address : Stack) {
282     const SampleContextFrameVector &ExpandedContext =
283         getCachedFrameLocationStack(Address);
284     // An instruction without a valid debug line will be ignored by sample
285     // processing
286     if (ExpandedContext.empty())
287       return SampleContextFrameVector();
288     // Set WasLeafInlined to the size of inlined frame count for the last
289     // address which is leaf
290     WasLeafInlined = (ExpandedContext.size() > 1);
291     ContextVec.append(ExpandedContext);
292   }
293 
294   // Replace with decoded base discriminator
295   for (auto &Frame : ContextVec) {
296     Frame.Location.Discriminator = ProfileGeneratorBase::getBaseDiscriminator(
297         Frame.Location.Discriminator, UseFSDiscriminator);
298   }
299 
300   assert(ContextVec.size() && "Context length should be at least 1");
301 
302   // Compress the context string except for the leaf frame
303   auto LeafFrame = ContextVec.back();
304   LeafFrame.Location = LineLocation(0, 0);
305   ContextVec.pop_back();
306   CSProfileGenerator::compressRecursionContext(ContextVec);
307   CSProfileGenerator::trimContext(ContextVec);
308   ContextVec.push_back(LeafFrame);
309   return ContextVec;
310 }
311 
312 template <class ELFT>
setPreferredTextSegmentAddresses(const ELFFile<ELFT> & Obj,StringRef FileName)313 void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
314                                                       StringRef FileName) {
315   const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
316   // FIXME: This should be the page size of the system running profiling.
317   // However such info isn't available at post-processing time, assuming
318   // 4K page now. Note that we don't use EXEC_PAGESIZE from <linux/param.h>
319   // because we may build the tools on non-linux.
320   uint32_t PageSize = 0x1000;
321   for (const typename ELFT::Phdr &Phdr : PhdrRange) {
322     if (Phdr.p_type == ELF::PT_LOAD) {
323       if (!FirstLoadableAddress)
324         FirstLoadableAddress = Phdr.p_vaddr & ~(PageSize - 1U);
325       if (Phdr.p_flags & ELF::PF_X) {
326         // Segments will always be loaded at a page boundary.
327         PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr &
328                                                 ~(PageSize - 1U));
329         TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U));
330       }
331     }
332   }
333 
334   if (PreferredTextSegmentAddresses.empty())
335     exitWithError("no executable segment found", FileName);
336 }
337 
setPreferredTextSegmentAddresses(const ELFObjectFileBase * Obj)338 void ProfiledBinary::setPreferredTextSegmentAddresses(
339     const ELFObjectFileBase *Obj) {
340   if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
341     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
342   else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
343     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
344   else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
345     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
346   else if (const auto *ELFObj = cast<ELF64BEObjectFile>(Obj))
347     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
348   else
349     llvm_unreachable("invalid ELF object format");
350 }
351 
checkPseudoProbe(const ELFObjectFileBase * Obj)352 void ProfiledBinary::checkPseudoProbe(const ELFObjectFileBase *Obj) {
353   if (UseDwarfCorrelation)
354     return;
355 
356   bool HasProbeDescSection = false;
357   bool HasPseudoProbeSection = false;
358 
359   StringRef FileName = Obj->getFileName();
360   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
361        SI != SE; ++SI) {
362     const SectionRef &Section = *SI;
363     StringRef SectionName = unwrapOrError(Section.getName(), FileName);
364     if (SectionName == ".pseudo_probe_desc") {
365       HasProbeDescSection = true;
366     } else if (SectionName == ".pseudo_probe") {
367       HasPseudoProbeSection = true;
368     }
369   }
370 
371   // set UsePseudoProbes flag, used for PerfReader
372   UsePseudoProbes = HasProbeDescSection && HasPseudoProbeSection;
373 }
374 
decodePseudoProbe(const ELFObjectFileBase * Obj)375 void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
376   if (!UsePseudoProbes)
377     return;
378 
379   MCPseudoProbeDecoder::Uint64Set GuidFilter;
380   MCPseudoProbeDecoder::Uint64Map FuncStartAddresses;
381   if (ShowDisassemblyOnly) {
382     if (DisassembleFunctionSet.empty()) {
383       FuncStartAddresses = SymbolStartAddrs;
384     } else {
385       for (auto &F : DisassembleFunctionSet) {
386         auto GUID = Function::getGUID(F.first());
387         if (auto StartAddr = SymbolStartAddrs.lookup(GUID)) {
388           FuncStartAddresses[GUID] = StartAddr;
389           FuncRange &Range = StartAddrToFuncRangeMap[StartAddr];
390           GuidFilter.insert(Function::getGUID(Range.getFuncName()));
391         }
392       }
393     }
394   } else {
395     for (auto *F : ProfiledFunctions) {
396       GuidFilter.insert(Function::getGUID(F->FuncName));
397       for (auto &Range : F->Ranges) {
398         auto GUIDs = StartAddrToSymMap.equal_range(Range.first);
399         for (auto I = GUIDs.first; I != GUIDs.second; ++I)
400           FuncStartAddresses[I->second] = I->first;
401       }
402     }
403   }
404 
405   StringRef FileName = Obj->getFileName();
406   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
407        SI != SE; ++SI) {
408     const SectionRef &Section = *SI;
409     StringRef SectionName = unwrapOrError(Section.getName(), FileName);
410 
411     if (SectionName == ".pseudo_probe_desc") {
412       StringRef Contents = unwrapOrError(Section.getContents(), FileName);
413       if (!ProbeDecoder.buildGUID2FuncDescMap(
414               reinterpret_cast<const uint8_t *>(Contents.data()),
415               Contents.size()))
416         exitWithError(
417             "Pseudo Probe decoder fail in .pseudo_probe_desc section");
418     } else if (SectionName == ".pseudo_probe") {
419       StringRef Contents = unwrapOrError(Section.getContents(), FileName);
420       if (!ProbeDecoder.buildAddress2ProbeMap(
421               reinterpret_cast<const uint8_t *>(Contents.data()),
422               Contents.size(), GuidFilter, FuncStartAddresses))
423         exitWithError("Pseudo Probe decoder fail in .pseudo_probe section");
424     }
425   }
426 
427   // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe
428   // is available
429   if (TrackFuncContextSize) {
430     for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
431       auto *Frame = Child.second.get();
432       StringRef FuncName =
433           ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName;
434       TopLevelProbeFrameMap[FuncName] = Frame;
435     }
436   }
437 
438   if (ShowPseudoProbe)
439     ProbeDecoder.printGUID2FuncDescMap(outs());
440 }
441 
decodePseudoProbe()442 void ProfiledBinary::decodePseudoProbe() {
443   OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
444   Binary &ExeBinary = *OBinary.getBinary();
445   auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
446   decodePseudoProbe(Obj);
447 }
448 
setIsFuncEntry(FuncRange * FuncRange,StringRef RangeSymName)449 void ProfiledBinary::setIsFuncEntry(FuncRange *FuncRange,
450                                     StringRef RangeSymName) {
451   // Skip external function symbol.
452   if (!FuncRange)
453     return;
454 
455   // Set IsFuncEntry to ture if there is only one range in the function or the
456   // RangeSymName from ELF is equal to its DWARF-based function name.
457   if (FuncRange->Func->Ranges.size() == 1 ||
458       (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName))
459     FuncRange->IsFuncEntry = true;
460 }
461 
dissassembleSymbol(std::size_t SI,ArrayRef<uint8_t> Bytes,SectionSymbolsTy & Symbols,const SectionRef & Section)462 bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
463                                         SectionSymbolsTy &Symbols,
464                                         const SectionRef &Section) {
465   std::size_t SE = Symbols.size();
466   uint64_t SectionAddress = Section.getAddress();
467   uint64_t SectSize = Section.getSize();
468   uint64_t StartAddress = Symbols[SI].Addr;
469   uint64_t NextStartAddress =
470       (SI + 1 < SE) ? Symbols[SI + 1].Addr : SectionAddress + SectSize;
471   FuncRange *FRange = findFuncRange(StartAddress);
472   setIsFuncEntry(FRange, FunctionSamples::getCanonicalFnName(Symbols[SI].Name));
473   StringRef SymbolName =
474       ShowCanonicalFnName
475           ? FunctionSamples::getCanonicalFnName(Symbols[SI].Name)
476           : Symbols[SI].Name;
477   bool ShowDisassembly =
478       ShowDisassemblyOnly && (DisassembleFunctionSet.empty() ||
479                               DisassembleFunctionSet.count(SymbolName));
480   if (ShowDisassembly)
481     outs() << '<' << SymbolName << ">:\n";
482 
483   auto WarnInvalidInsts = [](uint64_t Start, uint64_t End) {
484     WithColor::warning() << "Invalid instructions at "
485                          << format("%8" PRIx64, Start) << " - "
486                          << format("%8" PRIx64, End) << "\n";
487   };
488 
489   uint64_t Address = StartAddress;
490   // Size of a consecutive invalid instruction range starting from Address -1
491   // backwards.
492   uint64_t InvalidInstLength = 0;
493   while (Address < NextStartAddress) {
494     MCInst Inst;
495     uint64_t Size;
496     // Disassemble an instruction.
497     bool Disassembled = DisAsm->getInstruction(
498         Inst, Size, Bytes.slice(Address - SectionAddress), Address, nulls());
499     if (Size == 0)
500       Size = 1;
501 
502     if (ShowDisassembly) {
503       if (ShowPseudoProbe) {
504         ProbeDecoder.printProbeForAddress(outs(), Address);
505       }
506       outs() << format("%8" PRIx64 ":", Address);
507       size_t Start = outs().tell();
508       if (Disassembled)
509         IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), outs());
510       else
511         outs() << "\t<unknown>";
512       if (ShowSourceLocations) {
513         unsigned Cur = outs().tell() - Start;
514         if (Cur < 40)
515           outs().indent(40 - Cur);
516         InstructionPointer IP(this, Address);
517         outs() << getReversedLocWithContext(
518             symbolize(IP, ShowCanonicalFnName, ShowPseudoProbe));
519       }
520       outs() << "\n";
521     }
522 
523     if (Disassembled) {
524       const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
525 
526       // Record instruction size.
527       AddressToInstSizeMap[Address] = Size;
528 
529       // Populate address maps.
530       CodeAddressVec.push_back(Address);
531       if (MCDesc.isCall()) {
532         CallAddressSet.insert(Address);
533         UncondBranchAddrSet.insert(Address);
534       } else if (MCDesc.isReturn()) {
535         RetAddressSet.insert(Address);
536         UncondBranchAddrSet.insert(Address);
537       } else if (MCDesc.isBranch()) {
538         if (MCDesc.isUnconditionalBranch())
539           UncondBranchAddrSet.insert(Address);
540         BranchAddressSet.insert(Address);
541       }
542 
543       // Record potential call targets for tail frame inference later-on.
544       if (InferMissingFrames && FRange) {
545         uint64_t Target = 0;
546         MIA->evaluateBranch(Inst, Address, Size, Target);
547         if (MCDesc.isCall()) {
548           // Indirect call targets are unknown at this point. Recording the
549           // unknown target (zero) for further LBR-based refinement.
550           MissingContextInferrer->CallEdges[Address].insert(Target);
551         } else if (MCDesc.isUnconditionalBranch()) {
552           assert(Target &&
553                  "target should be known for unconditional direct branch");
554           // Any inter-function unconditional jump is considered tail call at
555           // this point. This is not 100% accurate and could further be
556           // optimized based on some source annotation.
557           FuncRange *ToFRange = findFuncRange(Target);
558           if (ToFRange && ToFRange->Func != FRange->Func)
559             MissingContextInferrer->TailCallEdges[Address].insert(Target);
560           LLVM_DEBUG({
561             dbgs() << "Direct Tail call: " << format("%8" PRIx64 ":", Address);
562             IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs());
563             dbgs() << "\n";
564           });
565         } else if (MCDesc.isIndirectBranch() && MCDesc.isBarrier()) {
566           // This is an indirect branch but not necessarily an indirect tail
567           // call. The isBarrier check is to filter out conditional branch.
568           // Similar with indirect call targets, recording the unknown target
569           // (zero) for further LBR-based refinement.
570           MissingContextInferrer->TailCallEdges[Address].insert(Target);
571           LLVM_DEBUG({
572             dbgs() << "Indirect Tail call: "
573                    << format("%8" PRIx64 ":", Address);
574             IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs());
575             dbgs() << "\n";
576           });
577         }
578       }
579 
580       if (InvalidInstLength) {
581         WarnInvalidInsts(Address - InvalidInstLength, Address - 1);
582         InvalidInstLength = 0;
583       }
584     } else {
585       InvalidInstLength += Size;
586     }
587 
588     Address += Size;
589   }
590 
591   if (InvalidInstLength)
592     WarnInvalidInsts(Address - InvalidInstLength, Address - 1);
593 
594   if (ShowDisassembly)
595     outs() << "\n";
596 
597   return true;
598 }
599 
setUpDisassembler(const ELFObjectFileBase * Obj)600 void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
601   const Target *TheTarget = getTarget(Obj);
602   std::string TripleName = TheTriple.getTriple();
603   StringRef FileName = Obj->getFileName();
604 
605   MRI.reset(TheTarget->createMCRegInfo(TripleName));
606   if (!MRI)
607     exitWithError("no register info for target " + TripleName, FileName);
608 
609   MCTargetOptions MCOptions;
610   AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
611   if (!AsmInfo)
612     exitWithError("no assembly info for target " + TripleName, FileName);
613 
614   Expected<SubtargetFeatures> Features = Obj->getFeatures();
615   if (!Features)
616     exitWithError(Features.takeError(), FileName);
617   STI.reset(
618       TheTarget->createMCSubtargetInfo(TripleName, "", Features->getString()));
619   if (!STI)
620     exitWithError("no subtarget info for target " + TripleName, FileName);
621 
622   MII.reset(TheTarget->createMCInstrInfo());
623   if (!MII)
624     exitWithError("no instruction info for target " + TripleName, FileName);
625 
626   MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
627   std::unique_ptr<MCObjectFileInfo> MOFI(
628       TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
629   Ctx.setObjectFileInfo(MOFI.get());
630   DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
631   if (!DisAsm)
632     exitWithError("no disassembler for target " + TripleName, FileName);
633 
634   MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));
635 
636   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
637   IPrinter.reset(TheTarget->createMCInstPrinter(
638       Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
639   IPrinter->setPrintBranchImmAsAddress(true);
640 }
641 
disassemble(const ELFObjectFileBase * Obj)642 void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
643   // Set up disassembler and related components.
644   setUpDisassembler(Obj);
645 
646   // Create a mapping from virtual address to symbol name. The symbols in text
647   // sections are the candidates to dissassemble.
648   std::map<SectionRef, SectionSymbolsTy> AllSymbols;
649   StringRef FileName = Obj->getFileName();
650   for (const SymbolRef &Symbol : Obj->symbols()) {
651     const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
652     const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
653     section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
654     if (SecI != Obj->section_end())
655       AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
656   }
657 
658   // Sort all the symbols. Use a stable sort to stabilize the output.
659   for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
660     stable_sort(SecSyms.second);
661 
662   assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) &&
663          "Functions to disassemble should be only specified together with "
664          "--show-disassembly-only");
665 
666   if (ShowDisassemblyOnly)
667     outs() << "\nDisassembly of " << FileName << ":\n";
668 
669   // Dissassemble a text section.
670   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
671        SI != SE; ++SI) {
672     const SectionRef &Section = *SI;
673     if (!Section.isText())
674       continue;
675 
676     uint64_t ImageLoadAddr = getPreferredBaseAddress();
677     uint64_t SectionAddress = Section.getAddress() - ImageLoadAddr;
678     uint64_t SectSize = Section.getSize();
679     if (!SectSize)
680       continue;
681 
682     // Register the text section.
683     TextSections.insert({SectionAddress, SectSize});
684 
685     StringRef SectionName = unwrapOrError(Section.getName(), FileName);
686 
687     if (ShowDisassemblyOnly) {
688       outs() << "\nDisassembly of section " << SectionName;
689       outs() << " [" << format("0x%" PRIx64, Section.getAddress()) << ", "
690              << format("0x%" PRIx64, Section.getAddress() + SectSize)
691              << "]:\n\n";
692     }
693 
694     if (SectionName == ".plt")
695       continue;
696 
697     // Get the section data.
698     ArrayRef<uint8_t> Bytes =
699         arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));
700 
701     // Get the list of all the symbols in this section.
702     SectionSymbolsTy &Symbols = AllSymbols[Section];
703 
704     // Disassemble symbol by symbol.
705     for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
706       if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
707         exitWithError("disassembling error", FileName);
708     }
709   }
710 
711   // Dissassemble rodata section to check if FS discriminator symbol exists.
712   checkUseFSDiscriminator(Obj, AllSymbols);
713 }
714 
checkUseFSDiscriminator(const ELFObjectFileBase * Obj,std::map<SectionRef,SectionSymbolsTy> & AllSymbols)715 void ProfiledBinary::checkUseFSDiscriminator(
716     const ELFObjectFileBase *Obj,
717     std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
718   const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
719   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
720        SI != SE; ++SI) {
721     const SectionRef &Section = *SI;
722     if (!Section.isData() || Section.getSize() == 0)
723       continue;
724     SectionSymbolsTy &Symbols = AllSymbols[Section];
725 
726     for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
727       if (Symbols[SI].Name == FSDiscriminatorVar) {
728         UseFSDiscriminator = true;
729         return;
730       }
731     }
732   }
733 }
734 
populateElfSymbolAddressList(const ELFObjectFileBase * Obj)735 void ProfiledBinary::populateElfSymbolAddressList(
736     const ELFObjectFileBase *Obj) {
737   // Create a mapping from virtual address to symbol GUID and the other way
738   // around.
739   StringRef FileName = Obj->getFileName();
740   for (const SymbolRef &Symbol : Obj->symbols()) {
741     const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
742     const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
743     uint64_t GUID = Function::getGUID(Name);
744     SymbolStartAddrs[GUID] = Addr;
745     StartAddrToSymMap.emplace(Addr, GUID);
746   }
747 }
748 
loadSymbolsFromDWARFUnit(DWARFUnit & CompilationUnit)749 void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
750   for (const auto &DieInfo : CompilationUnit.dies()) {
751     llvm::DWARFDie Die(&CompilationUnit, &DieInfo);
752 
753     if (!Die.isSubprogramDIE())
754       continue;
755     auto Name = Die.getName(llvm::DINameKind::LinkageName);
756     if (!Name)
757       Name = Die.getName(llvm::DINameKind::ShortName);
758     if (!Name)
759       continue;
760 
761     auto RangesOrError = Die.getAddressRanges();
762     if (!RangesOrError)
763       continue;
764     const DWARFAddressRangesVector &Ranges = RangesOrError.get();
765 
766     if (Ranges.empty())
767       continue;
768 
769     // Different DWARF symbols can have same function name, search or create
770     // BinaryFunction indexed by the name.
771     auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
772     auto &Func = Ret.first->second;
773     if (Ret.second)
774       Func.FuncName = Ret.first->first;
775 
776     for (const auto &Range : Ranges) {
777       uint64_t StartAddress = Range.LowPC;
778       uint64_t EndAddress = Range.HighPC;
779 
780       if (EndAddress <= StartAddress ||
781           StartAddress < getPreferredBaseAddress())
782         continue;
783 
784       // We may want to know all ranges for one function. Here group the
785       // ranges and store them into BinaryFunction.
786       Func.Ranges.emplace_back(StartAddress, EndAddress);
787 
788       auto R = StartAddrToFuncRangeMap.emplace(StartAddress, FuncRange());
789       if (R.second) {
790         FuncRange &FRange = R.first->second;
791         FRange.Func = &Func;
792         FRange.StartAddress = StartAddress;
793         FRange.EndAddress = EndAddress;
794       } else {
795         WithColor::warning()
796             << "Duplicated symbol start address at "
797             << format("%8" PRIx64, StartAddress) << " "
798             << R.first->second.getFuncName() << " and " << Name << "\n";
799       }
800     }
801   }
802 }
803 
loadSymbolsFromDWARF(ObjectFile & Obj)804 void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) {
805   auto DebugContext = llvm::DWARFContext::create(
806       Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, DWPPath);
807   if (!DebugContext)
808     exitWithError("Error creating the debug info context", Path);
809 
810   for (const auto &CompilationUnit : DebugContext->compile_units())
811     loadSymbolsFromDWARFUnit(*CompilationUnit.get());
812 
813   // Handles DWO sections that can either be in .o, .dwo or .dwp files.
814   for (const auto &CompilationUnit : DebugContext->compile_units()) {
815     DWARFUnit *const DwarfUnit = CompilationUnit.get();
816     if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
817       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
818       if (!DWOCU->isDWOUnit()) {
819         std::string DWOName = dwarf::toString(
820             DwarfUnit->getUnitDIE().find(
821                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
822             "");
823         WithColor::warning()
824             << "DWO debug information for " << DWOName
825             << " was not loaded. Please check the .o, .dwo or .dwp path.\n";
826         continue;
827       }
828       loadSymbolsFromDWARFUnit(*DWOCU);
829     }
830   }
831 
832   if (BinaryFunctions.empty())
833     WithColor::warning() << "Loading of DWARF info completed, but no binary "
834                             "functions have been retrieved.\n";
835 }
836 
populateSymbolListFromDWARF(ProfileSymbolList & SymbolList)837 void ProfiledBinary::populateSymbolListFromDWARF(
838     ProfileSymbolList &SymbolList) {
839   for (auto &I : StartAddrToFuncRangeMap)
840     SymbolList.add(I.second.getFuncName());
841 }
842 
setupSymbolizer()843 void ProfiledBinary::setupSymbolizer() {
844   symbolize::LLVMSymbolizer::Options SymbolizerOpts;
845   SymbolizerOpts.PrintFunctions =
846       DILineInfoSpecifier::FunctionNameKind::LinkageName;
847   SymbolizerOpts.Demangle = false;
848   SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
849   SymbolizerOpts.UseSymbolTable = false;
850   SymbolizerOpts.RelativeAddresses = false;
851   SymbolizerOpts.DWPName = DWPPath;
852   Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts);
853 }
854 
symbolize(const InstructionPointer & IP,bool UseCanonicalFnName,bool UseProbeDiscriminator)855 SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
856                                                    bool UseCanonicalFnName,
857                                                    bool UseProbeDiscriminator) {
858   assert(this == IP.Binary &&
859          "Binary should only symbolize its own instruction");
860   auto Addr = object::SectionedAddress{IP.Address,
861                                        object::SectionedAddress::UndefSection};
862   DIInliningInfo InlineStack = unwrapOrError(
863       Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr),
864       SymbolizerPath);
865 
866   SampleContextFrameVector CallStack;
867   for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
868     const auto &CallerFrame = InlineStack.getFrame(I);
869     if (CallerFrame.FunctionName == "<invalid>")
870       break;
871 
872     StringRef FunctionName(CallerFrame.FunctionName);
873     if (UseCanonicalFnName)
874       FunctionName = FunctionSamples::getCanonicalFnName(FunctionName);
875 
876     uint32_t Discriminator = CallerFrame.Discriminator;
877     uint32_t LineOffset = (CallerFrame.Line - CallerFrame.StartLine) & 0xffff;
878     if (UseProbeDiscriminator) {
879       LineOffset =
880           PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
881       Discriminator = 0;
882     }
883 
884     LineLocation Line(LineOffset, Discriminator);
885     auto It = NameStrings.insert(FunctionName.str());
886     CallStack.emplace_back(*It.first, Line);
887   }
888 
889   return CallStack;
890 }
891 
computeInlinedContextSizeForRange(uint64_t RangeBegin,uint64_t RangeEnd)892 void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t RangeBegin,
893                                                        uint64_t RangeEnd) {
894   InstructionPointer IP(this, RangeBegin, true);
895 
896   if (IP.Address != RangeBegin)
897     WithColor::warning() << "Invalid start instruction at "
898                          << format("%8" PRIx64, RangeBegin) << "\n";
899 
900   if (IP.Address >= RangeEnd)
901     return;
902 
903   do {
904     const SampleContextFrameVector SymbolizedCallStack =
905         getFrameLocationStack(IP.Address, UsePseudoProbes);
906     uint64_t Size = AddressToInstSizeMap[IP.Address];
907     // Record instruction size for the corresponding context
908     FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size);
909 
910   } while (IP.advance() && IP.Address < RangeEnd);
911 }
912 
computeInlinedContextSizeForFunc(const BinaryFunction * Func)913 void ProfiledBinary::computeInlinedContextSizeForFunc(
914     const BinaryFunction *Func) {
915   // Note that a function can be spilt into multiple ranges, so compute for all
916   // ranges of the function.
917   for (const auto &Range : Func->Ranges)
918     computeInlinedContextSizeForRange(Range.first, Range.second);
919 
920   // Track optimized-away inlinee for probed binary. A function inlined and then
921   // optimized away should still have their probes left over in places.
922   if (usePseudoProbes()) {
923     auto I = TopLevelProbeFrameMap.find(Func->FuncName);
924     if (I != TopLevelProbeFrameMap.end()) {
925       BinarySizeContextTracker::ProbeFrameStack ProbeContext;
926       FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder, *I->second,
927                                                  ProbeContext);
928     }
929   }
930 }
931 
inferMissingFrames(const SmallVectorImpl<uint64_t> & Context,SmallVectorImpl<uint64_t> & NewContext)932 void ProfiledBinary::inferMissingFrames(
933     const SmallVectorImpl<uint64_t> &Context,
934     SmallVectorImpl<uint64_t> &NewContext) {
935   MissingContextInferrer->inferMissingFrames(Context, NewContext);
936 }
937 
InstructionPointer(const ProfiledBinary * Binary,uint64_t Address,bool RoundToNext)938 InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
939                                        uint64_t Address, bool RoundToNext)
940     : Binary(Binary), Address(Address) {
941   Index = Binary->getIndexForAddr(Address);
942   if (RoundToNext) {
943     // we might get address which is not the code
944     // it should round to the next valid address
945     if (Index >= Binary->getCodeAddrVecSize())
946       this->Address = UINT64_MAX;
947     else
948       this->Address = Binary->getAddressforIndex(Index);
949   }
950 }
951 
advance()952 bool InstructionPointer::advance() {
953   Index++;
954   if (Index >= Binary->getCodeAddrVecSize()) {
955     Address = UINT64_MAX;
956     return false;
957   }
958   Address = Binary->getAddressforIndex(Index);
959   return true;
960 }
961 
backward()962 bool InstructionPointer::backward() {
963   if (Index == 0) {
964     Address = 0;
965     return false;
966   }
967   Index--;
968   Address = Binary->getAddressforIndex(Index);
969   return true;
970 }
971 
update(uint64_t Addr)972 void InstructionPointer::update(uint64_t Addr) {
973   Address = Addr;
974   Index = Binary->getIndexForAddr(Address);
975 }
976 
977 } // end namespace sampleprof
978 } // end namespace llvm
979