1 //===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "ProfiledBinary.h"
10 #include "ErrorHandling.h"
11 #include "MissingFrameInferrer.h"
12 #include "ProfileGenerator.h"
13 #include "llvm/ADT/Triple.h"
14 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
15 #include "llvm/Demangle/Demangle.h"
16 #include "llvm/IR/DebugInfoMetadata.h"
17 #include "llvm/MC/TargetRegistry.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/Format.h"
21 #include "llvm/Support/TargetSelect.h"
22 #include <optional>
23
24 #define DEBUG_TYPE "load-binary"
25
26 using namespace llvm;
27 using namespace sampleprof;
28
29 cl::opt<bool> ShowDisassemblyOnly("show-disassembly-only",
30 cl::desc("Print disassembled code."));
31
32 cl::opt<bool> ShowSourceLocations("show-source-locations",
33 cl::desc("Print source locations."));
34
35 static cl::opt<bool>
36 ShowCanonicalFnName("show-canonical-fname",
37 cl::desc("Print canonical function name."));
38
39 static cl::opt<bool> ShowPseudoProbe(
40 "show-pseudo-probe",
41 cl::desc("Print pseudo probe section and disassembled info."));
42
43 static cl::opt<bool> UseDwarfCorrelation(
44 "use-dwarf-correlation",
45 cl::desc("Use dwarf for profile correlation even when binary contains "
46 "pseudo probe."));
47
48 static cl::opt<std::string>
49 DWPPath("dwp", cl::init(""),
50 cl::desc("Path of .dwp file. When not specified, it will be "
51 "<binary>.dwp in the same directory as the main binary."));
52
53 static cl::list<std::string> DisassembleFunctions(
54 "disassemble-functions", cl::CommaSeparated,
55 cl::desc("List of functions to print disassembly for. Accept demangled "
56 "names only. Only work with show-disassembly-only"));
57
58 extern cl::opt<bool> ShowDetailedWarning;
59 extern cl::opt<bool> InferMissingFrames;
60
61 namespace llvm {
62 namespace sampleprof {
63
getTarget(const ObjectFile * Obj)64 static const Target *getTarget(const ObjectFile *Obj) {
65 Triple TheTriple = Obj->makeTriple();
66 std::string Error;
67 std::string ArchName;
68 const Target *TheTarget =
69 TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
70 if (!TheTarget)
71 exitWithError(Error, Obj->getFileName());
72 return TheTarget;
73 }
74
addInstructionForContext(const SampleContextFrameVector & Context,uint32_t InstrSize)75 void BinarySizeContextTracker::addInstructionForContext(
76 const SampleContextFrameVector &Context, uint32_t InstrSize) {
77 ContextTrieNode *CurNode = &RootContext;
78 bool IsLeaf = true;
79 for (const auto &Callsite : reverse(Context)) {
80 StringRef CallerName = Callsite.FuncName;
81 LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.Location;
82 CurNode = CurNode->getOrCreateChildContext(CallsiteLoc, CallerName);
83 IsLeaf = false;
84 }
85
86 CurNode->addFunctionSize(InstrSize);
87 }
88
89 uint32_t
getFuncSizeForContext(const ContextTrieNode * Node)90 BinarySizeContextTracker::getFuncSizeForContext(const ContextTrieNode *Node) {
91 ContextTrieNode *CurrNode = &RootContext;
92 ContextTrieNode *PrevNode = nullptr;
93
94 std::optional<uint32_t> Size;
95
96 // Start from top-level context-less function, traverse down the reverse
97 // context trie to find the best/longest match for given context, then
98 // retrieve the size.
99 LineLocation CallSiteLoc(0, 0);
100 while (CurrNode && Node->getParentContext() != nullptr) {
101 PrevNode = CurrNode;
102 CurrNode = CurrNode->getChildContext(CallSiteLoc, Node->getFuncName());
103 if (CurrNode && CurrNode->getFunctionSize())
104 Size = *CurrNode->getFunctionSize();
105 CallSiteLoc = Node->getCallSiteLoc();
106 Node = Node->getParentContext();
107 }
108
109 // If we traversed all nodes along the path of the context and haven't
110 // found a size yet, pivot to look for size from sibling nodes, i.e size
111 // of inlinee under different context.
112 if (!Size) {
113 if (!CurrNode)
114 CurrNode = PrevNode;
115 while (!Size && CurrNode && !CurrNode->getAllChildContext().empty()) {
116 CurrNode = &CurrNode->getAllChildContext().begin()->second;
117 if (CurrNode->getFunctionSize())
118 Size = *CurrNode->getFunctionSize();
119 }
120 }
121
122 assert(Size && "We should at least find one context size.");
123 return *Size;
124 }
125
trackInlineesOptimizedAway(MCPseudoProbeDecoder & ProbeDecoder)126 void BinarySizeContextTracker::trackInlineesOptimizedAway(
127 MCPseudoProbeDecoder &ProbeDecoder) {
128 ProbeFrameStack ProbeContext;
129 for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren())
130 trackInlineesOptimizedAway(ProbeDecoder, *Child.second.get(), ProbeContext);
131 }
132
trackInlineesOptimizedAway(MCPseudoProbeDecoder & ProbeDecoder,MCDecodedPseudoProbeInlineTree & ProbeNode,ProbeFrameStack & ProbeContext)133 void BinarySizeContextTracker::trackInlineesOptimizedAway(
134 MCPseudoProbeDecoder &ProbeDecoder,
135 MCDecodedPseudoProbeInlineTree &ProbeNode, ProbeFrameStack &ProbeContext) {
136 StringRef FuncName =
137 ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName;
138 ProbeContext.emplace_back(FuncName, 0);
139
140 // This ProbeContext has a probe, so it has code before inlining and
141 // optimization. Make sure we mark its size as known.
142 if (!ProbeNode.getProbes().empty()) {
143 ContextTrieNode *SizeContext = &RootContext;
144 for (auto &ProbeFrame : reverse(ProbeContext)) {
145 StringRef CallerName = ProbeFrame.first;
146 LineLocation CallsiteLoc(ProbeFrame.second, 0);
147 SizeContext =
148 SizeContext->getOrCreateChildContext(CallsiteLoc, CallerName);
149 }
150 // Add 0 size to make known.
151 SizeContext->addFunctionSize(0);
152 }
153
154 // DFS down the probe inline tree
155 for (const auto &ChildNode : ProbeNode.getChildren()) {
156 InlineSite Location = ChildNode.first;
157 ProbeContext.back().second = std::get<1>(Location);
158 trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(),
159 ProbeContext);
160 }
161
162 ProbeContext.pop_back();
163 }
164
ProfiledBinary(const StringRef ExeBinPath,const StringRef DebugBinPath)165 ProfiledBinary::ProfiledBinary(const StringRef ExeBinPath,
166 const StringRef DebugBinPath)
167 : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this),
168 TrackFuncContextSize(EnableCSPreInliner && UseContextCostForPreInliner) {
169 // Point to executable binary if debug info binary is not specified.
170 SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath;
171 setupSymbolizer();
172 if (InferMissingFrames)
173 MissingContextInferrer = std::make_unique<MissingFrameInferrer>(this);
174 load();
175 }
176
~ProfiledBinary()177 ProfiledBinary::~ProfiledBinary() {}
178
warnNoFuncEntry()179 void ProfiledBinary::warnNoFuncEntry() {
180 uint64_t NoFuncEntryNum = 0;
181 for (auto &F : BinaryFunctions) {
182 if (F.second.Ranges.empty())
183 continue;
184 bool hasFuncEntry = false;
185 for (auto &R : F.second.Ranges) {
186 if (FuncRange *FR = findFuncRangeForStartAddr(R.first)) {
187 if (FR->IsFuncEntry) {
188 hasFuncEntry = true;
189 break;
190 }
191 }
192 }
193
194 if (!hasFuncEntry) {
195 NoFuncEntryNum++;
196 if (ShowDetailedWarning)
197 WithColor::warning()
198 << "Failed to determine function entry for " << F.first
199 << " due to inconsistent name from symbol table and dwarf info.\n";
200 }
201 }
202 emitWarningSummary(NoFuncEntryNum, BinaryFunctions.size(),
203 "of functions failed to determine function entry due to "
204 "inconsistent name from symbol table and dwarf info.");
205 }
206
load()207 void ProfiledBinary::load() {
208 // Attempt to open the binary.
209 OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
210 Binary &ExeBinary = *OBinary.getBinary();
211
212 auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
213 if (!Obj)
214 exitWithError("not a valid Elf image", Path);
215
216 TheTriple = Obj->makeTriple();
217 // Current only support X86
218 if (!TheTriple.isX86())
219 exitWithError("unsupported target", TheTriple.getTriple());
220 LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
221
222 // Find the preferred load address for text sections.
223 setPreferredTextSegmentAddresses(Obj);
224
225 // Load debug info of subprograms from DWARF section.
226 // If path of debug info binary is specified, use the debug info from it,
227 // otherwise use the debug info from the executable binary.
228 if (!DebugBinaryPath.empty()) {
229 OwningBinary<Binary> DebugPath =
230 unwrapOrError(createBinary(DebugBinaryPath), DebugBinaryPath);
231 loadSymbolsFromDWARF(*cast<ObjectFile>(DebugPath.getBinary()));
232 } else {
233 loadSymbolsFromDWARF(*cast<ObjectFile>(&ExeBinary));
234 }
235
236 DisassembleFunctionSet.insert(DisassembleFunctions.begin(),
237 DisassembleFunctions.end());
238
239 checkPseudoProbe(Obj);
240
241 if (UsePseudoProbes)
242 populateElfSymbolAddressList(Obj);
243
244 if (ShowDisassemblyOnly)
245 decodePseudoProbe(Obj);
246
247 // Disassemble the text sections.
248 disassemble(Obj);
249
250 // Use function start and return address to infer prolog and epilog
251 ProEpilogTracker.inferPrologAddresses(StartAddrToFuncRangeMap);
252 ProEpilogTracker.inferEpilogAddresses(RetAddressSet);
253
254 warnNoFuncEntry();
255
256 // TODO: decode other sections.
257 }
258
inlineContextEqual(uint64_t Address1,uint64_t Address2)259 bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) {
260 const SampleContextFrameVector &Context1 =
261 getCachedFrameLocationStack(Address1);
262 const SampleContextFrameVector &Context2 =
263 getCachedFrameLocationStack(Address2);
264 if (Context1.size() != Context2.size())
265 return false;
266 if (Context1.empty())
267 return false;
268 // The leaf frame contains location within the leaf, and it
269 // needs to be remove that as it's not part of the calling context
270 return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
271 Context2.begin(), Context2.begin() + Context2.size() - 1);
272 }
273
274 SampleContextFrameVector
getExpandedContext(const SmallVectorImpl<uint64_t> & Stack,bool & WasLeafInlined)275 ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
276 bool &WasLeafInlined) {
277 SampleContextFrameVector ContextVec;
278 if (Stack.empty())
279 return ContextVec;
280 // Process from frame root to leaf
281 for (auto Address : Stack) {
282 const SampleContextFrameVector &ExpandedContext =
283 getCachedFrameLocationStack(Address);
284 // An instruction without a valid debug line will be ignored by sample
285 // processing
286 if (ExpandedContext.empty())
287 return SampleContextFrameVector();
288 // Set WasLeafInlined to the size of inlined frame count for the last
289 // address which is leaf
290 WasLeafInlined = (ExpandedContext.size() > 1);
291 ContextVec.append(ExpandedContext);
292 }
293
294 // Replace with decoded base discriminator
295 for (auto &Frame : ContextVec) {
296 Frame.Location.Discriminator = ProfileGeneratorBase::getBaseDiscriminator(
297 Frame.Location.Discriminator, UseFSDiscriminator);
298 }
299
300 assert(ContextVec.size() && "Context length should be at least 1");
301
302 // Compress the context string except for the leaf frame
303 auto LeafFrame = ContextVec.back();
304 LeafFrame.Location = LineLocation(0, 0);
305 ContextVec.pop_back();
306 CSProfileGenerator::compressRecursionContext(ContextVec);
307 CSProfileGenerator::trimContext(ContextVec);
308 ContextVec.push_back(LeafFrame);
309 return ContextVec;
310 }
311
312 template <class ELFT>
setPreferredTextSegmentAddresses(const ELFFile<ELFT> & Obj,StringRef FileName)313 void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
314 StringRef FileName) {
315 const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
316 // FIXME: This should be the page size of the system running profiling.
317 // However such info isn't available at post-processing time, assuming
318 // 4K page now. Note that we don't use EXEC_PAGESIZE from <linux/param.h>
319 // because we may build the tools on non-linux.
320 uint32_t PageSize = 0x1000;
321 for (const typename ELFT::Phdr &Phdr : PhdrRange) {
322 if (Phdr.p_type == ELF::PT_LOAD) {
323 if (!FirstLoadableAddress)
324 FirstLoadableAddress = Phdr.p_vaddr & ~(PageSize - 1U);
325 if (Phdr.p_flags & ELF::PF_X) {
326 // Segments will always be loaded at a page boundary.
327 PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr &
328 ~(PageSize - 1U));
329 TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U));
330 }
331 }
332 }
333
334 if (PreferredTextSegmentAddresses.empty())
335 exitWithError("no executable segment found", FileName);
336 }
337
setPreferredTextSegmentAddresses(const ELFObjectFileBase * Obj)338 void ProfiledBinary::setPreferredTextSegmentAddresses(
339 const ELFObjectFileBase *Obj) {
340 if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
341 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
342 else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
343 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
344 else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
345 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
346 else if (const auto *ELFObj = cast<ELF64BEObjectFile>(Obj))
347 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
348 else
349 llvm_unreachable("invalid ELF object format");
350 }
351
checkPseudoProbe(const ELFObjectFileBase * Obj)352 void ProfiledBinary::checkPseudoProbe(const ELFObjectFileBase *Obj) {
353 if (UseDwarfCorrelation)
354 return;
355
356 bool HasProbeDescSection = false;
357 bool HasPseudoProbeSection = false;
358
359 StringRef FileName = Obj->getFileName();
360 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
361 SI != SE; ++SI) {
362 const SectionRef &Section = *SI;
363 StringRef SectionName = unwrapOrError(Section.getName(), FileName);
364 if (SectionName == ".pseudo_probe_desc") {
365 HasProbeDescSection = true;
366 } else if (SectionName == ".pseudo_probe") {
367 HasPseudoProbeSection = true;
368 }
369 }
370
371 // set UsePseudoProbes flag, used for PerfReader
372 UsePseudoProbes = HasProbeDescSection && HasPseudoProbeSection;
373 }
374
decodePseudoProbe(const ELFObjectFileBase * Obj)375 void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
376 if (!UsePseudoProbes)
377 return;
378
379 MCPseudoProbeDecoder::Uint64Set GuidFilter;
380 MCPseudoProbeDecoder::Uint64Map FuncStartAddresses;
381 if (ShowDisassemblyOnly) {
382 if (DisassembleFunctionSet.empty()) {
383 FuncStartAddresses = SymbolStartAddrs;
384 } else {
385 for (auto &F : DisassembleFunctionSet) {
386 auto GUID = Function::getGUID(F.first());
387 if (auto StartAddr = SymbolStartAddrs.lookup(GUID)) {
388 FuncStartAddresses[GUID] = StartAddr;
389 FuncRange &Range = StartAddrToFuncRangeMap[StartAddr];
390 GuidFilter.insert(Function::getGUID(Range.getFuncName()));
391 }
392 }
393 }
394 } else {
395 for (auto *F : ProfiledFunctions) {
396 GuidFilter.insert(Function::getGUID(F->FuncName));
397 for (auto &Range : F->Ranges) {
398 auto GUIDs = StartAddrToSymMap.equal_range(Range.first);
399 for (auto I = GUIDs.first; I != GUIDs.second; ++I)
400 FuncStartAddresses[I->second] = I->first;
401 }
402 }
403 }
404
405 StringRef FileName = Obj->getFileName();
406 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
407 SI != SE; ++SI) {
408 const SectionRef &Section = *SI;
409 StringRef SectionName = unwrapOrError(Section.getName(), FileName);
410
411 if (SectionName == ".pseudo_probe_desc") {
412 StringRef Contents = unwrapOrError(Section.getContents(), FileName);
413 if (!ProbeDecoder.buildGUID2FuncDescMap(
414 reinterpret_cast<const uint8_t *>(Contents.data()),
415 Contents.size()))
416 exitWithError(
417 "Pseudo Probe decoder fail in .pseudo_probe_desc section");
418 } else if (SectionName == ".pseudo_probe") {
419 StringRef Contents = unwrapOrError(Section.getContents(), FileName);
420 if (!ProbeDecoder.buildAddress2ProbeMap(
421 reinterpret_cast<const uint8_t *>(Contents.data()),
422 Contents.size(), GuidFilter, FuncStartAddresses))
423 exitWithError("Pseudo Probe decoder fail in .pseudo_probe section");
424 }
425 }
426
427 // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe
428 // is available
429 if (TrackFuncContextSize) {
430 for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
431 auto *Frame = Child.second.get();
432 StringRef FuncName =
433 ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName;
434 TopLevelProbeFrameMap[FuncName] = Frame;
435 }
436 }
437
438 if (ShowPseudoProbe)
439 ProbeDecoder.printGUID2FuncDescMap(outs());
440 }
441
decodePseudoProbe()442 void ProfiledBinary::decodePseudoProbe() {
443 OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
444 Binary &ExeBinary = *OBinary.getBinary();
445 auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
446 decodePseudoProbe(Obj);
447 }
448
setIsFuncEntry(FuncRange * FuncRange,StringRef RangeSymName)449 void ProfiledBinary::setIsFuncEntry(FuncRange *FuncRange,
450 StringRef RangeSymName) {
451 // Skip external function symbol.
452 if (!FuncRange)
453 return;
454
455 // Set IsFuncEntry to ture if there is only one range in the function or the
456 // RangeSymName from ELF is equal to its DWARF-based function name.
457 if (FuncRange->Func->Ranges.size() == 1 ||
458 (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName))
459 FuncRange->IsFuncEntry = true;
460 }
461
dissassembleSymbol(std::size_t SI,ArrayRef<uint8_t> Bytes,SectionSymbolsTy & Symbols,const SectionRef & Section)462 bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
463 SectionSymbolsTy &Symbols,
464 const SectionRef &Section) {
465 std::size_t SE = Symbols.size();
466 uint64_t SectionAddress = Section.getAddress();
467 uint64_t SectSize = Section.getSize();
468 uint64_t StartAddress = Symbols[SI].Addr;
469 uint64_t NextStartAddress =
470 (SI + 1 < SE) ? Symbols[SI + 1].Addr : SectionAddress + SectSize;
471 FuncRange *FRange = findFuncRange(StartAddress);
472 setIsFuncEntry(FRange, FunctionSamples::getCanonicalFnName(Symbols[SI].Name));
473 StringRef SymbolName =
474 ShowCanonicalFnName
475 ? FunctionSamples::getCanonicalFnName(Symbols[SI].Name)
476 : Symbols[SI].Name;
477 bool ShowDisassembly =
478 ShowDisassemblyOnly && (DisassembleFunctionSet.empty() ||
479 DisassembleFunctionSet.count(SymbolName));
480 if (ShowDisassembly)
481 outs() << '<' << SymbolName << ">:\n";
482
483 auto WarnInvalidInsts = [](uint64_t Start, uint64_t End) {
484 WithColor::warning() << "Invalid instructions at "
485 << format("%8" PRIx64, Start) << " - "
486 << format("%8" PRIx64, End) << "\n";
487 };
488
489 uint64_t Address = StartAddress;
490 // Size of a consecutive invalid instruction range starting from Address -1
491 // backwards.
492 uint64_t InvalidInstLength = 0;
493 while (Address < NextStartAddress) {
494 MCInst Inst;
495 uint64_t Size;
496 // Disassemble an instruction.
497 bool Disassembled = DisAsm->getInstruction(
498 Inst, Size, Bytes.slice(Address - SectionAddress), Address, nulls());
499 if (Size == 0)
500 Size = 1;
501
502 if (ShowDisassembly) {
503 if (ShowPseudoProbe) {
504 ProbeDecoder.printProbeForAddress(outs(), Address);
505 }
506 outs() << format("%8" PRIx64 ":", Address);
507 size_t Start = outs().tell();
508 if (Disassembled)
509 IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), outs());
510 else
511 outs() << "\t<unknown>";
512 if (ShowSourceLocations) {
513 unsigned Cur = outs().tell() - Start;
514 if (Cur < 40)
515 outs().indent(40 - Cur);
516 InstructionPointer IP(this, Address);
517 outs() << getReversedLocWithContext(
518 symbolize(IP, ShowCanonicalFnName, ShowPseudoProbe));
519 }
520 outs() << "\n";
521 }
522
523 if (Disassembled) {
524 const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
525
526 // Record instruction size.
527 AddressToInstSizeMap[Address] = Size;
528
529 // Populate address maps.
530 CodeAddressVec.push_back(Address);
531 if (MCDesc.isCall()) {
532 CallAddressSet.insert(Address);
533 UncondBranchAddrSet.insert(Address);
534 } else if (MCDesc.isReturn()) {
535 RetAddressSet.insert(Address);
536 UncondBranchAddrSet.insert(Address);
537 } else if (MCDesc.isBranch()) {
538 if (MCDesc.isUnconditionalBranch())
539 UncondBranchAddrSet.insert(Address);
540 BranchAddressSet.insert(Address);
541 }
542
543 // Record potential call targets for tail frame inference later-on.
544 if (InferMissingFrames && FRange) {
545 uint64_t Target = 0;
546 MIA->evaluateBranch(Inst, Address, Size, Target);
547 if (MCDesc.isCall()) {
548 // Indirect call targets are unknown at this point. Recording the
549 // unknown target (zero) for further LBR-based refinement.
550 MissingContextInferrer->CallEdges[Address].insert(Target);
551 } else if (MCDesc.isUnconditionalBranch()) {
552 assert(Target &&
553 "target should be known for unconditional direct branch");
554 // Any inter-function unconditional jump is considered tail call at
555 // this point. This is not 100% accurate and could further be
556 // optimized based on some source annotation.
557 FuncRange *ToFRange = findFuncRange(Target);
558 if (ToFRange && ToFRange->Func != FRange->Func)
559 MissingContextInferrer->TailCallEdges[Address].insert(Target);
560 LLVM_DEBUG({
561 dbgs() << "Direct Tail call: " << format("%8" PRIx64 ":", Address);
562 IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs());
563 dbgs() << "\n";
564 });
565 } else if (MCDesc.isIndirectBranch() && MCDesc.isBarrier()) {
566 // This is an indirect branch but not necessarily an indirect tail
567 // call. The isBarrier check is to filter out conditional branch.
568 // Similar with indirect call targets, recording the unknown target
569 // (zero) for further LBR-based refinement.
570 MissingContextInferrer->TailCallEdges[Address].insert(Target);
571 LLVM_DEBUG({
572 dbgs() << "Indirect Tail call: "
573 << format("%8" PRIx64 ":", Address);
574 IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs());
575 dbgs() << "\n";
576 });
577 }
578 }
579
580 if (InvalidInstLength) {
581 WarnInvalidInsts(Address - InvalidInstLength, Address - 1);
582 InvalidInstLength = 0;
583 }
584 } else {
585 InvalidInstLength += Size;
586 }
587
588 Address += Size;
589 }
590
591 if (InvalidInstLength)
592 WarnInvalidInsts(Address - InvalidInstLength, Address - 1);
593
594 if (ShowDisassembly)
595 outs() << "\n";
596
597 return true;
598 }
599
setUpDisassembler(const ELFObjectFileBase * Obj)600 void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
601 const Target *TheTarget = getTarget(Obj);
602 std::string TripleName = TheTriple.getTriple();
603 StringRef FileName = Obj->getFileName();
604
605 MRI.reset(TheTarget->createMCRegInfo(TripleName));
606 if (!MRI)
607 exitWithError("no register info for target " + TripleName, FileName);
608
609 MCTargetOptions MCOptions;
610 AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
611 if (!AsmInfo)
612 exitWithError("no assembly info for target " + TripleName, FileName);
613
614 Expected<SubtargetFeatures> Features = Obj->getFeatures();
615 if (!Features)
616 exitWithError(Features.takeError(), FileName);
617 STI.reset(
618 TheTarget->createMCSubtargetInfo(TripleName, "", Features->getString()));
619 if (!STI)
620 exitWithError("no subtarget info for target " + TripleName, FileName);
621
622 MII.reset(TheTarget->createMCInstrInfo());
623 if (!MII)
624 exitWithError("no instruction info for target " + TripleName, FileName);
625
626 MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
627 std::unique_ptr<MCObjectFileInfo> MOFI(
628 TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
629 Ctx.setObjectFileInfo(MOFI.get());
630 DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
631 if (!DisAsm)
632 exitWithError("no disassembler for target " + TripleName, FileName);
633
634 MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));
635
636 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
637 IPrinter.reset(TheTarget->createMCInstPrinter(
638 Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
639 IPrinter->setPrintBranchImmAsAddress(true);
640 }
641
disassemble(const ELFObjectFileBase * Obj)642 void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
643 // Set up disassembler and related components.
644 setUpDisassembler(Obj);
645
646 // Create a mapping from virtual address to symbol name. The symbols in text
647 // sections are the candidates to dissassemble.
648 std::map<SectionRef, SectionSymbolsTy> AllSymbols;
649 StringRef FileName = Obj->getFileName();
650 for (const SymbolRef &Symbol : Obj->symbols()) {
651 const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
652 const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
653 section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
654 if (SecI != Obj->section_end())
655 AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
656 }
657
658 // Sort all the symbols. Use a stable sort to stabilize the output.
659 for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
660 stable_sort(SecSyms.second);
661
662 assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) &&
663 "Functions to disassemble should be only specified together with "
664 "--show-disassembly-only");
665
666 if (ShowDisassemblyOnly)
667 outs() << "\nDisassembly of " << FileName << ":\n";
668
669 // Dissassemble a text section.
670 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
671 SI != SE; ++SI) {
672 const SectionRef &Section = *SI;
673 if (!Section.isText())
674 continue;
675
676 uint64_t ImageLoadAddr = getPreferredBaseAddress();
677 uint64_t SectionAddress = Section.getAddress() - ImageLoadAddr;
678 uint64_t SectSize = Section.getSize();
679 if (!SectSize)
680 continue;
681
682 // Register the text section.
683 TextSections.insert({SectionAddress, SectSize});
684
685 StringRef SectionName = unwrapOrError(Section.getName(), FileName);
686
687 if (ShowDisassemblyOnly) {
688 outs() << "\nDisassembly of section " << SectionName;
689 outs() << " [" << format("0x%" PRIx64, Section.getAddress()) << ", "
690 << format("0x%" PRIx64, Section.getAddress() + SectSize)
691 << "]:\n\n";
692 }
693
694 if (SectionName == ".plt")
695 continue;
696
697 // Get the section data.
698 ArrayRef<uint8_t> Bytes =
699 arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));
700
701 // Get the list of all the symbols in this section.
702 SectionSymbolsTy &Symbols = AllSymbols[Section];
703
704 // Disassemble symbol by symbol.
705 for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
706 if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
707 exitWithError("disassembling error", FileName);
708 }
709 }
710
711 // Dissassemble rodata section to check if FS discriminator symbol exists.
712 checkUseFSDiscriminator(Obj, AllSymbols);
713 }
714
checkUseFSDiscriminator(const ELFObjectFileBase * Obj,std::map<SectionRef,SectionSymbolsTy> & AllSymbols)715 void ProfiledBinary::checkUseFSDiscriminator(
716 const ELFObjectFileBase *Obj,
717 std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
718 const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
719 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
720 SI != SE; ++SI) {
721 const SectionRef &Section = *SI;
722 if (!Section.isData() || Section.getSize() == 0)
723 continue;
724 SectionSymbolsTy &Symbols = AllSymbols[Section];
725
726 for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
727 if (Symbols[SI].Name == FSDiscriminatorVar) {
728 UseFSDiscriminator = true;
729 return;
730 }
731 }
732 }
733 }
734
populateElfSymbolAddressList(const ELFObjectFileBase * Obj)735 void ProfiledBinary::populateElfSymbolAddressList(
736 const ELFObjectFileBase *Obj) {
737 // Create a mapping from virtual address to symbol GUID and the other way
738 // around.
739 StringRef FileName = Obj->getFileName();
740 for (const SymbolRef &Symbol : Obj->symbols()) {
741 const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
742 const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
743 uint64_t GUID = Function::getGUID(Name);
744 SymbolStartAddrs[GUID] = Addr;
745 StartAddrToSymMap.emplace(Addr, GUID);
746 }
747 }
748
loadSymbolsFromDWARFUnit(DWARFUnit & CompilationUnit)749 void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
750 for (const auto &DieInfo : CompilationUnit.dies()) {
751 llvm::DWARFDie Die(&CompilationUnit, &DieInfo);
752
753 if (!Die.isSubprogramDIE())
754 continue;
755 auto Name = Die.getName(llvm::DINameKind::LinkageName);
756 if (!Name)
757 Name = Die.getName(llvm::DINameKind::ShortName);
758 if (!Name)
759 continue;
760
761 auto RangesOrError = Die.getAddressRanges();
762 if (!RangesOrError)
763 continue;
764 const DWARFAddressRangesVector &Ranges = RangesOrError.get();
765
766 if (Ranges.empty())
767 continue;
768
769 // Different DWARF symbols can have same function name, search or create
770 // BinaryFunction indexed by the name.
771 auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
772 auto &Func = Ret.first->second;
773 if (Ret.second)
774 Func.FuncName = Ret.first->first;
775
776 for (const auto &Range : Ranges) {
777 uint64_t StartAddress = Range.LowPC;
778 uint64_t EndAddress = Range.HighPC;
779
780 if (EndAddress <= StartAddress ||
781 StartAddress < getPreferredBaseAddress())
782 continue;
783
784 // We may want to know all ranges for one function. Here group the
785 // ranges and store them into BinaryFunction.
786 Func.Ranges.emplace_back(StartAddress, EndAddress);
787
788 auto R = StartAddrToFuncRangeMap.emplace(StartAddress, FuncRange());
789 if (R.second) {
790 FuncRange &FRange = R.first->second;
791 FRange.Func = &Func;
792 FRange.StartAddress = StartAddress;
793 FRange.EndAddress = EndAddress;
794 } else {
795 WithColor::warning()
796 << "Duplicated symbol start address at "
797 << format("%8" PRIx64, StartAddress) << " "
798 << R.first->second.getFuncName() << " and " << Name << "\n";
799 }
800 }
801 }
802 }
803
loadSymbolsFromDWARF(ObjectFile & Obj)804 void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) {
805 auto DebugContext = llvm::DWARFContext::create(
806 Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, DWPPath);
807 if (!DebugContext)
808 exitWithError("Error creating the debug info context", Path);
809
810 for (const auto &CompilationUnit : DebugContext->compile_units())
811 loadSymbolsFromDWARFUnit(*CompilationUnit.get());
812
813 // Handles DWO sections that can either be in .o, .dwo or .dwp files.
814 for (const auto &CompilationUnit : DebugContext->compile_units()) {
815 DWARFUnit *const DwarfUnit = CompilationUnit.get();
816 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
817 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
818 if (!DWOCU->isDWOUnit()) {
819 std::string DWOName = dwarf::toString(
820 DwarfUnit->getUnitDIE().find(
821 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
822 "");
823 WithColor::warning()
824 << "DWO debug information for " << DWOName
825 << " was not loaded. Please check the .o, .dwo or .dwp path.\n";
826 continue;
827 }
828 loadSymbolsFromDWARFUnit(*DWOCU);
829 }
830 }
831
832 if (BinaryFunctions.empty())
833 WithColor::warning() << "Loading of DWARF info completed, but no binary "
834 "functions have been retrieved.\n";
835 }
836
populateSymbolListFromDWARF(ProfileSymbolList & SymbolList)837 void ProfiledBinary::populateSymbolListFromDWARF(
838 ProfileSymbolList &SymbolList) {
839 for (auto &I : StartAddrToFuncRangeMap)
840 SymbolList.add(I.second.getFuncName());
841 }
842
setupSymbolizer()843 void ProfiledBinary::setupSymbolizer() {
844 symbolize::LLVMSymbolizer::Options SymbolizerOpts;
845 SymbolizerOpts.PrintFunctions =
846 DILineInfoSpecifier::FunctionNameKind::LinkageName;
847 SymbolizerOpts.Demangle = false;
848 SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
849 SymbolizerOpts.UseSymbolTable = false;
850 SymbolizerOpts.RelativeAddresses = false;
851 SymbolizerOpts.DWPName = DWPPath;
852 Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts);
853 }
854
symbolize(const InstructionPointer & IP,bool UseCanonicalFnName,bool UseProbeDiscriminator)855 SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
856 bool UseCanonicalFnName,
857 bool UseProbeDiscriminator) {
858 assert(this == IP.Binary &&
859 "Binary should only symbolize its own instruction");
860 auto Addr = object::SectionedAddress{IP.Address,
861 object::SectionedAddress::UndefSection};
862 DIInliningInfo InlineStack = unwrapOrError(
863 Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr),
864 SymbolizerPath);
865
866 SampleContextFrameVector CallStack;
867 for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
868 const auto &CallerFrame = InlineStack.getFrame(I);
869 if (CallerFrame.FunctionName == "<invalid>")
870 break;
871
872 StringRef FunctionName(CallerFrame.FunctionName);
873 if (UseCanonicalFnName)
874 FunctionName = FunctionSamples::getCanonicalFnName(FunctionName);
875
876 uint32_t Discriminator = CallerFrame.Discriminator;
877 uint32_t LineOffset = (CallerFrame.Line - CallerFrame.StartLine) & 0xffff;
878 if (UseProbeDiscriminator) {
879 LineOffset =
880 PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
881 Discriminator = 0;
882 }
883
884 LineLocation Line(LineOffset, Discriminator);
885 auto It = NameStrings.insert(FunctionName.str());
886 CallStack.emplace_back(*It.first, Line);
887 }
888
889 return CallStack;
890 }
891
computeInlinedContextSizeForRange(uint64_t RangeBegin,uint64_t RangeEnd)892 void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t RangeBegin,
893 uint64_t RangeEnd) {
894 InstructionPointer IP(this, RangeBegin, true);
895
896 if (IP.Address != RangeBegin)
897 WithColor::warning() << "Invalid start instruction at "
898 << format("%8" PRIx64, RangeBegin) << "\n";
899
900 if (IP.Address >= RangeEnd)
901 return;
902
903 do {
904 const SampleContextFrameVector SymbolizedCallStack =
905 getFrameLocationStack(IP.Address, UsePseudoProbes);
906 uint64_t Size = AddressToInstSizeMap[IP.Address];
907 // Record instruction size for the corresponding context
908 FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size);
909
910 } while (IP.advance() && IP.Address < RangeEnd);
911 }
912
computeInlinedContextSizeForFunc(const BinaryFunction * Func)913 void ProfiledBinary::computeInlinedContextSizeForFunc(
914 const BinaryFunction *Func) {
915 // Note that a function can be spilt into multiple ranges, so compute for all
916 // ranges of the function.
917 for (const auto &Range : Func->Ranges)
918 computeInlinedContextSizeForRange(Range.first, Range.second);
919
920 // Track optimized-away inlinee for probed binary. A function inlined and then
921 // optimized away should still have their probes left over in places.
922 if (usePseudoProbes()) {
923 auto I = TopLevelProbeFrameMap.find(Func->FuncName);
924 if (I != TopLevelProbeFrameMap.end()) {
925 BinarySizeContextTracker::ProbeFrameStack ProbeContext;
926 FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder, *I->second,
927 ProbeContext);
928 }
929 }
930 }
931
inferMissingFrames(const SmallVectorImpl<uint64_t> & Context,SmallVectorImpl<uint64_t> & NewContext)932 void ProfiledBinary::inferMissingFrames(
933 const SmallVectorImpl<uint64_t> &Context,
934 SmallVectorImpl<uint64_t> &NewContext) {
935 MissingContextInferrer->inferMissingFrames(Context, NewContext);
936 }
937
InstructionPointer(const ProfiledBinary * Binary,uint64_t Address,bool RoundToNext)938 InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
939 uint64_t Address, bool RoundToNext)
940 : Binary(Binary), Address(Address) {
941 Index = Binary->getIndexForAddr(Address);
942 if (RoundToNext) {
943 // we might get address which is not the code
944 // it should round to the next valid address
945 if (Index >= Binary->getCodeAddrVecSize())
946 this->Address = UINT64_MAX;
947 else
948 this->Address = Binary->getAddressforIndex(Index);
949 }
950 }
951
advance()952 bool InstructionPointer::advance() {
953 Index++;
954 if (Index >= Binary->getCodeAddrVecSize()) {
955 Address = UINT64_MAX;
956 return false;
957 }
958 Address = Binary->getAddressforIndex(Index);
959 return true;
960 }
961
backward()962 bool InstructionPointer::backward() {
963 if (Index == 0) {
964 Address = 0;
965 return false;
966 }
967 Index--;
968 Address = Binary->getAddressforIndex(Index);
969 return true;
970 }
971
update(uint64_t Addr)972 void InstructionPointer::update(uint64_t Addr) {
973 Address = Addr;
974 Index = Binary->getIndexForAddr(Address);
975 }
976
977 } // end namespace sampleprof
978 } // end namespace llvm
979