1 //===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
basic_future_combinators()6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfiledBinary.h"
10 #include "ErrorHandling.h"
11 #include "ProfileGenerator.h"
12 #include "llvm/ADT/Triple.h"
13 #include "llvm/Demangle/Demangle.h"
14 #include "llvm/IR/DebugInfoMetadata.h"
15 #include "llvm/Support/CommandLine.h"
16 #include "llvm/Support/Format.h"
17 #include "llvm/Support/TargetRegistry.h"
18 #include "llvm/Support/TargetSelect.h"
19 
20 #define DEBUG_TYPE "load-binary"
21 
22 using namespace llvm;
23 using namespace sampleprof;
24 
25 cl::opt<bool> ShowDisassemblyOnly("show-disassembly-only", cl::ReallyHidden,
26                                   cl::init(false), cl::ZeroOrMore,
27                                   cl::desc("Print disassembled code."));
28 
29 cl::opt<bool> ShowSourceLocations("show-source-locations", cl::ReallyHidden,
30                                   cl::init(false), cl::ZeroOrMore,
basic_try_future_combinators()31                                   cl::desc("Print source locations."));
32 
33 cl::opt<bool> ShowCanonicalFnName("show-canonical-fname", cl::ReallyHidden,
34                                   cl::init(false), cl::ZeroOrMore,
35                                   cl::desc("Print canonical function name."));
36 
37 cl::opt<bool> ShowPseudoProbe(
38     "show-pseudo-probe", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore,
39     cl::desc("Print pseudo probe section and disassembled info."));
40 
41 namespace llvm {
42 namespace sampleprof {
43 
44 static const Target *getTarget(const ObjectFile *Obj) {
45   Triple TheTriple = Obj->makeTriple();
46   std::string Error;
47   std::string ArchName;
48   const Target *TheTarget =
49       TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
50   if (!TheTarget)
51     exitWithError(Error, Obj->getFileName());
52   return TheTarget;
53 }
54 
55 void ProfiledBinary::load() {
56   // Attempt to open the binary.
57   OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
58   Binary &Binary = *OBinary.getBinary();
59 
60   auto *Obj = dyn_cast<ELFObjectFileBase>(&Binary);
61   if (!Obj)
62     exitWithError("not a valid Elf image", Path);
63 
64   TheTriple = Obj->makeTriple();
65   // Current only support X86
66   if (!TheTriple.isX86())
67     exitWithError("unsupported target", TheTriple.getTriple());
68   LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
69 
70   // Find the preferred load address for text sections.
71   setPreferredTextSegmentAddresses(Obj);
72 
73   // Decode pseudo probe related section
74   decodePseudoProbe(Obj);
75 
76   // Disassemble the text sections.
77   disassemble(Obj);
78 
79   // Use function start and return address to infer prolog and epilog
80   ProEpilogTracker.inferPrologOffsets(FuncStartAddrMap);
81   ProEpilogTracker.inferEpilogOffsets(RetAddrs);
82 
83   // TODO: decode other sections.
84 }
85 
86 bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
87                                         uint64_t Address2) const {
88   uint64_t Offset1 = virtualAddrToOffset(Address1);
89   uint64_t Offset2 = virtualAddrToOffset(Address2);
90   const FrameLocationStack &Context1 = getFrameLocationStack(Offset1);
91   const FrameLocationStack &Context2 = getFrameLocationStack(Offset2);
92   if (Context1.size() != Context2.size())
93     return false;
94   if (Context1.empty())
95     return false;
96   // The leaf frame contains location within the leaf, and it
97   // needs to be remove that as it's not part of the calling context
98   return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
99                     Context2.begin(), Context2.begin() + Context2.size() - 1);
100 }
101 
102 std::string
103 ProfiledBinary::getExpandedContextStr(const SmallVectorImpl<uint64_t> &Stack,
104                                       bool &WasLeafInlined) const {
105   std::string ContextStr;
106   SmallVector<std::string, 16> ContextVec;
107   // Process from frame root to leaf
108   for (auto Address : Stack) {
109     uint64_t Offset = virtualAddrToOffset(Address);
110     const FrameLocationStack &ExpandedContext = getFrameLocationStack(Offset);
111     // An instruction without a valid debug line will be ignored by sample
112     // processing
113     if (ExpandedContext.empty())
114       return std::string();
115     // Set WasLeafInlined to the size of inlined frame count for the last
116     // address which is leaf
117     WasLeafInlined = (ExpandedContext.size() > 1);
118     for (const auto &Loc : ExpandedContext) {
119       ContextVec.push_back(getCallSite(Loc));
120     }
121   }
122 
123   assert(ContextVec.size() && "Context length should be at least 1");
124   // Compress the context string except for the leaf frame
125   std::string LeafFrame = ContextVec.back();
126   ContextVec.pop_back();
127   CSProfileGenerator::compressRecursionContext<std::string>(ContextVec);
128 
129   std::ostringstream OContextStr;
130   for (uint32_t I = 0; I < (uint32_t)ContextVec.size(); I++) {
131     if (OContextStr.str().size()) {
132       OContextStr << " @ ";
133     }
134     OContextStr << ContextVec[I];
135   }
136   // Only keep the function name for the leaf frame
137   if (OContextStr.str().size())
138     OContextStr << " @ ";
139   OContextStr << StringRef(LeafFrame).split(":").first.str();
140   return OContextStr.str();
141 }
142 
143 template <class ELFT>
144 void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName) {
145   const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
146   for (const typename ELFT::Phdr &Phdr : PhdrRange) {
147     if ((Phdr.p_type == ELF::PT_LOAD) && (Phdr.p_flags & ELF::PF_X)) {
148         // Segments will always be loaded at a page boundary.
149         PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr & ~(Phdr.p_align - 1U));
150         TextSegmentOffsets.push_back(Phdr.p_offset & ~(Phdr.p_align - 1U));
151       }
152   }
153 
154   if (PreferredTextSegmentAddresses.empty())
155     exitWithError("no executable segment found", FileName);
156 }
157 
158 void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFObjectFileBase *Obj) {
159   if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
160     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
161   else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
162     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
163   else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
164     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
165   else if (const auto *ELFObj = cast<ELF64BEObjectFile>(Obj))
166     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
167   else
168     llvm_unreachable("invalid ELF object format");
169 }
170 
171 void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
172   StringRef FileName = Obj->getFileName();
173   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
174        SI != SE; ++SI) {
175     const SectionRef &Section = *SI;
176     StringRef SectionName = unwrapOrError(Section.getName(), FileName);
177 
178     if (SectionName == ".pseudo_probe_desc") {
179       StringRef Contents = unwrapOrError(Section.getContents(), FileName);
180       ProbeDecoder.buildGUID2FuncDescMap(
181           reinterpret_cast<const uint8_t *>(Contents.data()), Contents.size());
182     } else if (SectionName == ".pseudo_probe") {
183       StringRef Contents = unwrapOrError(Section.getContents(), FileName);
184       ProbeDecoder.buildAddress2ProbeMap(
185           reinterpret_cast<const uint8_t *>(Contents.data()), Contents.size());
186       // set UsePseudoProbes flag, used for PerfReader
187       UsePseudoProbes = true;
188     }
189   }
190 
191   if (ShowPseudoProbe)
192     ProbeDecoder.printGUID2FuncDescMap(outs());
193 }
194 
195 bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
196                                         SectionSymbolsTy &Symbols,
197                                         const SectionRef &Section) {
198   std::size_t SE = Symbols.size();
199   uint64_t SectionOffset = Section.getAddress() - getPreferredBaseAddress();
200   uint64_t SectSize = Section.getSize();
201   uint64_t StartOffset = Symbols[SI].Addr - getPreferredBaseAddress();
202   uint64_t EndOffset = (SI + 1 < SE)
203                            ? Symbols[SI + 1].Addr - getPreferredBaseAddress()
204                            : SectionOffset + SectSize;
205   if (StartOffset >= EndOffset)
206     return true;
207 
208   StringRef SymbolName =
209       ShowCanonicalFnName
210           ? FunctionSamples::getCanonicalFnName(Symbols[SI].Name)
211           : Symbols[SI].Name;
212   if (ShowDisassemblyOnly)
213     outs() << '<' << SymbolName << ">:\n";
214 
215   auto WarnInvalidInsts = [](uint64_t Start, uint64_t End) {
216     WithColor::warning() << "Invalid instructions at "
217                          << format("%8" PRIx64, Start) << " - "
218                          << format("%8" PRIx64, End) << "\n";
219   };
220 
221   uint64_t Offset = StartOffset;
222   // Size of a consecutive invalid instruction range starting from Offset -1
223   // backwards.
224   uint64_t InvalidInstLength = 0;
225   while (Offset < EndOffset) {
226     MCInst Inst;
227     uint64_t Size;
228     // Disassemble an instruction.
229     bool Disassembled =
230         DisAsm->getInstruction(Inst, Size, Bytes.slice(Offset - SectionOffset),
231                                Offset + getPreferredBaseAddress(), nulls());
232     if (Size == 0)
233       Size = 1;
234 
235     if (ShowDisassemblyOnly) {
236       if (ShowPseudoProbe) {
237         ProbeDecoder.printProbeForAddress(outs(),
238                                           Offset + getPreferredBaseAddress());
239       }
240       outs() << format("%8" PRIx64 ":", Offset + getPreferredBaseAddress());
241       size_t Start = outs().tell();
242       if (Disassembled)
243         IPrinter->printInst(&Inst, Offset + Size, "", *STI.get(), outs());
244       else
245         outs() << "\t<unknown>";
246       if (ShowSourceLocations) {
247         unsigned Cur = outs().tell() - Start;
248         if (Cur < 40)
249           outs().indent(40 - Cur);
250         InstructionPointer IP(this, Offset);
251         outs() << getReversedLocWithContext(symbolize(IP, ShowCanonicalFnName));
252       }
253       outs() << "\n";
254     }
255 
256     if (Disassembled) {
257       const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
258       // Populate a vector of the symbolized callsite at this location
259       // We don't need symbolized info for probe-based profile, just use an
260       // empty stack as an entry to indicate a valid binary offset
261       FrameLocationStack SymbolizedCallStack;
262       if (!UsePseudoProbes) {
263         InstructionPointer IP(this, Offset);
264         SymbolizedCallStack = symbolize(IP, true);
265       }
266       Offset2LocStackMap[Offset] = SymbolizedCallStack;
267       // Populate address maps.
268       CodeAddrs.push_back(Offset);
269       if (MCDesc.isCall())
270         CallAddrs.insert(Offset);
271       else if (MCDesc.isReturn())
272         RetAddrs.insert(Offset);
273 
274       if (InvalidInstLength) {
275         WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
276         InvalidInstLength = 0;
277       }
278     } else {
279       InvalidInstLength += Size;
280     }
281 
282     Offset += Size;
283   }
284 
285   if (InvalidInstLength)
286     WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
287 
288   if (ShowDisassemblyOnly)
289     outs() << "\n";
290 
291   FuncStartAddrMap[StartOffset] = Symbols[SI].Name.str();
292   return true;
293 }
294 
295 void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
296   const Target *TheTarget = getTarget(Obj);
297   std::string TripleName = TheTriple.getTriple();
298   StringRef FileName = Obj->getFileName();
299 
300   MRI.reset(TheTarget->createMCRegInfo(TripleName));
301   if (!MRI)
302     exitWithError("no register info for target " + TripleName, FileName);
303 
304   MCTargetOptions MCOptions;
305   AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
306   if (!AsmInfo)
307     exitWithError("no assembly info for target " + TripleName, FileName);
308 
309   SubtargetFeatures Features = Obj->getFeatures();
310   STI.reset(
311       TheTarget->createMCSubtargetInfo(TripleName, "", Features.getString()));
312   if (!STI)
313     exitWithError("no subtarget info for target " + TripleName, FileName);
314 
315   MII.reset(TheTarget->createMCInstrInfo());
316   if (!MII)
317     exitWithError("no instruction info for target " + TripleName, FileName);
318 
319   MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
320   std::unique_ptr<MCObjectFileInfo> MOFI(
321       TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
322   Ctx.setObjectFileInfo(MOFI.get());
323   DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
324   if (!DisAsm)
325     exitWithError("no disassembler for target " + TripleName, FileName);
326 
327   MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));
328 
329   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
330   IPrinter.reset(TheTarget->createMCInstPrinter(
331       Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
332   IPrinter->setPrintBranchImmAsAddress(true);
333 }
334 
335 void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
336   // Set up disassembler and related components.
337   setUpDisassembler(Obj);
338 
339   // Create a mapping from virtual address to symbol name. The symbols in text
340   // sections are the candidates to dissassemble.
341   std::map<SectionRef, SectionSymbolsTy> AllSymbols;
342   StringRef FileName = Obj->getFileName();
343   for (const SymbolRef &Symbol : Obj->symbols()) {
344     const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
345     const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
346     section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
347     if (SecI != Obj->section_end())
348       AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
349   }
350 
351   // Sort all the symbols. Use a stable sort to stabilize the output.
352   for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
353     stable_sort(SecSyms.second);
354 
355   if (ShowDisassemblyOnly)
356     outs() << "\nDisassembly of " << FileName << ":\n";
357 
358   // Dissassemble a text section.
359   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
360        SI != SE; ++SI) {
361     const SectionRef &Section = *SI;
362     if (!Section.isText())
363       continue;
364 
365     uint64_t ImageLoadAddr = getPreferredBaseAddress();
366     uint64_t SectionOffset = Section.getAddress() - ImageLoadAddr;
367     uint64_t SectSize = Section.getSize();
368     if (!SectSize)
369       continue;
370 
371     // Register the text section.
372     TextSections.insert({SectionOffset, SectSize});
373 
374     if (ShowDisassemblyOnly) {
375       StringRef SectionName = unwrapOrError(Section.getName(), FileName);
376       outs() << "\nDisassembly of section " << SectionName;
377       outs() << " [" << format("0x%" PRIx64, Section.getAddress()) << ", "
378              << format("0x%" PRIx64, Section.getAddress() + SectSize)
379              << "]:\n\n";
380     }
381 
382     // Get the section data.
383     ArrayRef<uint8_t> Bytes =
384         arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));
385 
386     // Get the list of all the symbols in this section.
387     SectionSymbolsTy &Symbols = AllSymbols[Section];
388 
389     // Disassemble symbol by symbol.
390     for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
391       if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
392         exitWithError("disassembling error", FileName);
393     }
394   }
395 }
396 
397 void ProfiledBinary::setupSymbolizer() {
398   symbolize::LLVMSymbolizer::Options SymbolizerOpts;
399   SymbolizerOpts.PrintFunctions =
400       DILineInfoSpecifier::FunctionNameKind::LinkageName;
401   SymbolizerOpts.Demangle = false;
402   SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
403   SymbolizerOpts.UseSymbolTable = false;
404   SymbolizerOpts.RelativeAddresses = false;
405   Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts);
406 }
407 
408 FrameLocationStack ProfiledBinary::symbolize(const InstructionPointer &IP,
409                                              bool UseCanonicalFnName) {
410   assert(this == IP.Binary &&
411          "Binary should only symbolize its own instruction");
412   auto Addr = object::SectionedAddress{IP.Offset + getPreferredBaseAddress(),
413                                        object::SectionedAddress::UndefSection};
414   DIInliningInfo InlineStack =
415       unwrapOrError(Symbolizer->symbolizeInlinedCode(Path, Addr), getName());
416 
417   FrameLocationStack CallStack;
418 
419   for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
420     const auto &CallerFrame = InlineStack.getFrame(I);
421     if (CallerFrame.FunctionName == "<invalid>")
422       break;
423     StringRef FunctionName(CallerFrame.FunctionName);
424     if (UseCanonicalFnName)
425       FunctionName = FunctionSamples::getCanonicalFnName(FunctionName);
426     LineLocation Line(CallerFrame.Line - CallerFrame.StartLine,
427                       DILocation::getBaseDiscriminatorFromDiscriminator(
428                           CallerFrame.Discriminator,
429                           /* IsFSDiscriminator */ false));
430     FrameLocation Callsite(FunctionName.str(), Line);
431     CallStack.push_back(Callsite);
432   }
433 
434   return CallStack;
435 }
436 
437 InstructionPointer::InstructionPointer(ProfiledBinary *Binary, uint64_t Address,
438                                        bool RoundToNext)
439     : Binary(Binary), Address(Address) {
440   Index = Binary->getIndexForAddr(Address);
441   if (RoundToNext) {
442     // we might get address which is not the code
443     // it should round to the next valid address
444     this->Address = Binary->getAddressforIndex(Index);
445   }
446 }
447 
448 void InstructionPointer::advance() {
449   Index++;
450   Address = Binary->getAddressforIndex(Index);
451 }
452 
453 void InstructionPointer::backward() {
454   Index--;
455   Address = Binary->getAddressforIndex(Index);
456 }
457 
458 void InstructionPointer::update(uint64_t Addr) {
459   Address = Addr;
460   Index = Binary->getIndexForAddr(Address);
461 }
462 
463 } // end namespace sampleprof
464 } // end namespace llvm
465