1 //===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
11 
12 #include "CallContext.h"
13 #include "llvm/ADT/Optional.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
16 #include "llvm/MC/MCAsmInfo.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCInstPrinter.h"
21 #include "llvm/MC/MCInstrAnalysis.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCObjectFileInfo.h"
24 #include "llvm/MC/MCPseudoProbe.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCSubtargetInfo.h"
27 #include "llvm/MC/MCTargetOptions.h"
28 #include "llvm/Object/ELFObjectFile.h"
29 #include "llvm/ProfileData/SampleProf.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Transforms/IPO/SampleContextTracker.h"
33 #include <list>
34 #include <map>
35 #include <set>
36 #include <sstream>
37 #include <string>
38 #include <unordered_map>
39 #include <unordered_set>
40 #include <vector>
41 
42 extern cl::opt<bool> EnableCSPreInliner;
43 extern cl::opt<bool> UseContextCostForPreInliner;
44 
45 using namespace llvm;
46 using namespace sampleprof;
47 using namespace llvm::object;
48 
49 namespace llvm {
50 namespace sampleprof {
51 
52 class ProfiledBinary;
53 
54 struct InstructionPointer {
55   const ProfiledBinary *Binary;
56   union {
57     // Offset of the executable segment of the binary.
58     uint64_t Offset = 0;
59     // Also used as address in unwinder
60     uint64_t Address;
61   };
62   // Index to the sorted code address array of the binary.
63   uint64_t Index = 0;
64   InstructionPointer(const ProfiledBinary *Binary, uint64_t Address,
65                      bool RoundToNext = false);
66   void advance();
67   void backward();
68   void update(uint64_t Addr);
69 };
70 
71 // PrologEpilog offset tracker, used to filter out broken stack samples
72 // Currently we use a heuristic size (two) to infer prolog and epilog
73 // based on the start address and return address. In the future,
74 // we will switch to Dwarf CFI based tracker
75 struct PrologEpilogTracker {
76   // A set of prolog and epilog offsets. Used by virtual unwinding.
77   std::unordered_set<uint64_t> PrologEpilogSet;
78   ProfiledBinary *Binary;
PrologEpilogTrackerPrologEpilogTracker79   PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){};
80 
81   // Take the two addresses from the start of function as prolog
inferPrologOffsetsPrologEpilogTracker82   void inferPrologOffsets(std::map<uint64_t, std::pair<std::string, uint64_t>>
83                               &FuncStartOffsetMap) {
84     for (auto I : FuncStartOffsetMap) {
85       PrologEpilogSet.insert(I.first);
86       InstructionPointer IP(Binary, I.first);
87       IP.advance();
88       PrologEpilogSet.insert(IP.Offset);
89     }
90   }
91 
92   // Take the last two addresses before the return address as epilog
inferEpilogOffsetsPrologEpilogTracker93   void inferEpilogOffsets(std::unordered_set<uint64_t> &RetAddrs) {
94     for (auto Addr : RetAddrs) {
95       PrologEpilogSet.insert(Addr);
96       InstructionPointer IP(Binary, Addr);
97       IP.backward();
98       PrologEpilogSet.insert(IP.Offset);
99     }
100   }
101 };
102 
103 // Track function byte size under different context (outlined version as well as
104 // various inlined versions). It also provides query support to get function
105 // size with the best matching context, which is used to help pre-inliner use
106 // accurate post-optimization size to make decisions.
107 // TODO: If an inlinee is completely optimized away, ideally we should have zero
108 // for its context size, currently we would misss such context since it doesn't
109 // have instructions. To fix this, we need to mark all inlinee with entry probe
110 // but without instructions as having zero size.
111 class BinarySizeContextTracker {
112 public:
113   // Add instruction with given size to a context
114   void addInstructionForContext(const SampleContextFrameVector &Context,
115                                 uint32_t InstrSize);
116 
117   // Get function size with a specific context. When there's no exact match
118   // for the given context, try to retrieve the size of that function from
119   // closest matching context.
120   uint32_t getFuncSizeForContext(const SampleContext &Context);
121 
122   // For inlinees that are full optimized away, we can establish zero size using
123   // their remaining probes.
124   void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder);
125 
dump()126   void dump() { RootContext.dumpTree(); }
127 
128 private:
129   using ProbeFrameStack = SmallVector<std::pair<StringRef, uint32_t>>;
130   void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder,
131                               MCDecodedPseudoProbeInlineTree &ProbeNode,
132                               ProbeFrameStack &Context);
133 
134   // Root node for context trie tree, node that this is a reverse context trie
135   // with callee as parent and caller as child. This way we can traverse from
136   // root to find the best/longest matching context if an exact match does not
137   // exist. It gives us the best possible estimate for function's post-inline,
138   // post-optimization byte size.
139   ContextTrieNode RootContext;
140 };
141 
142 using OffsetRange = std::pair<uint64_t, uint64_t>;
143 
144 class ProfiledBinary {
145   // Absolute path of the binary.
146   std::string Path;
147   // The target triple.
148   Triple TheTriple;
149   // The runtime base address that the first executable segment is loaded at.
150   uint64_t BaseAddress = 0;
151   // The preferred load address of each executable segment.
152   std::vector<uint64_t> PreferredTextSegmentAddresses;
153   // The file offset of each executable segment.
154   std::vector<uint64_t> TextSegmentOffsets;
155 
156   // Mutiple MC component info
157   std::unique_ptr<const MCRegisterInfo> MRI;
158   std::unique_ptr<const MCAsmInfo> AsmInfo;
159   std::unique_ptr<const MCSubtargetInfo> STI;
160   std::unique_ptr<const MCInstrInfo> MII;
161   std::unique_ptr<MCDisassembler> DisAsm;
162   std::unique_ptr<const MCInstrAnalysis> MIA;
163   std::unique_ptr<MCInstPrinter> IPrinter;
164   // A list of text sections sorted by start RVA and size. Used to check
165   // if a given RVA is a valid code address.
166   std::set<std::pair<uint64_t, uint64_t>> TextSections;
167   // An ordered map of mapping function's start offset to its name and
168   // end offset.
169   std::map<uint64_t, std::pair<std::string, uint64_t>> FuncStartOffsetMap;
170   // Offset to context location map. Used to expand the context.
171   std::unordered_map<uint64_t, SampleContextFrameVector> Offset2LocStackMap;
172 
173   // Offset to instruction size map. Also used for quick offset lookup.
174   std::unordered_map<uint64_t, uint64_t> Offset2InstSizeMap;
175 
176   // An array of offsets of all instructions sorted in increasing order. The
177   // sorting is needed to fast advance to the next forward/backward instruction.
178   std::vector<uint64_t> CodeAddrOffsets;
179   // A set of call instruction offsets. Used by virtual unwinding.
180   std::unordered_set<uint64_t> CallAddrs;
181   // A set of return instruction offsets. Used by virtual unwinding.
182   std::unordered_set<uint64_t> RetAddrs;
183 
184   // Estimate and track function prolog and epilog ranges.
185   PrologEpilogTracker ProEpilogTracker;
186 
187   // Track function sizes under different context
188   BinarySizeContextTracker FuncSizeTracker;
189 
190   // The symbolizer used to get inline context for an instruction.
191   std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
192 
193   // String table owning function name strings created from the symbolizer.
194   std::unordered_set<std::string> NameStrings;
195 
196   // A collection of functions to print disassembly for.
197   StringSet<> DisassembleFunctionSet;
198 
199   // Pseudo probe decoder
200   MCPseudoProbeDecoder ProbeDecoder;
201 
202   bool UsePseudoProbes = false;
203 
204   // Whether we need to symbolize all instructions to get function context size.
205   bool TrackFuncContextSize = false;
206 
207   // Indicate if the base loading address is parsed from the mmap event or uses
208   // the preferred address
209   bool IsLoadedByMMap = false;
210   // Use to avoid redundant warning.
211   bool MissingMMapWarned = false;
212 
213   void setPreferredTextSegmentAddresses(const ELFObjectFileBase *O);
214 
215   template <class ELFT>
216   void setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName);
217 
218   void decodePseudoProbe(const ELFObjectFileBase *Obj);
219 
220   // Set up disassembler and related components.
221   void setUpDisassembler(const ELFObjectFileBase *Obj);
222   void setupSymbolizer();
223 
224   /// Dissassemble the text section and build various address maps.
225   void disassemble(const ELFObjectFileBase *O);
226 
227   /// Helper function to dissassemble the symbol and extract info for unwinding
228   bool dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
229                           SectionSymbolsTy &Symbols, const SectionRef &Section);
230   /// Symbolize a given instruction pointer and return a full call context.
231   SampleContextFrameVector symbolize(const InstructionPointer &IP,
232                                      bool UseCanonicalFnName = false,
233                                      bool UseProbeDiscriminator = false);
234   /// Decode the interesting parts of the binary and build internal data
235   /// structures. On high level, the parts of interest are:
236   ///   1. Text sections, including the main code section and the PLT
237   ///   entries that will be used to handle cross-module call transitions.
238   ///   2. The .debug_line section, used by Dwarf-based profile generation.
239   ///   3. Pseudo probe related sections, used by probe-based profile
240   ///   generation.
241   void load();
242 
243 public:
ProfiledBinary(const StringRef Path)244   ProfiledBinary(const StringRef Path)
245       : Path(Path), ProEpilogTracker(this),
246         TrackFuncContextSize(EnableCSPreInliner &&
247                              UseContextCostForPreInliner) {
248     setupSymbolizer();
249     load();
250   }
virtualAddrToOffset(uint64_t VirtualAddress)251   uint64_t virtualAddrToOffset(uint64_t VirtualAddress) const {
252     return VirtualAddress - BaseAddress;
253   }
offsetToVirtualAddr(uint64_t Offset)254   uint64_t offsetToVirtualAddr(uint64_t Offset) const {
255     return Offset + BaseAddress;
256   }
getPath()257   StringRef getPath() const { return Path; }
getName()258   StringRef getName() const { return llvm::sys::path::filename(Path); }
getBaseAddress()259   uint64_t getBaseAddress() const { return BaseAddress; }
setBaseAddress(uint64_t Address)260   void setBaseAddress(uint64_t Address) { BaseAddress = Address; }
261 
262   // Return the preferred load address for the first executable segment.
getPreferredBaseAddress()263   uint64_t getPreferredBaseAddress() const { return PreferredTextSegmentAddresses[0]; }
264   // Return the file offset for the first executable segment.
getTextSegmentOffset()265   uint64_t getTextSegmentOffset() const { return TextSegmentOffsets[0]; }
getPreferredTextSegmentAddresses()266   const std::vector<uint64_t> &getPreferredTextSegmentAddresses() const {
267     return PreferredTextSegmentAddresses;
268   }
getTextSegmentOffsets()269   const std::vector<uint64_t> &getTextSegmentOffsets() const {
270     return TextSegmentOffsets;
271   }
272 
addressIsCode(uint64_t Address)273   bool addressIsCode(uint64_t Address) const {
274     uint64_t Offset = virtualAddrToOffset(Address);
275     return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end();
276   }
addressIsCall(uint64_t Address)277   bool addressIsCall(uint64_t Address) const {
278     uint64_t Offset = virtualAddrToOffset(Address);
279     return CallAddrs.count(Offset);
280   }
addressIsReturn(uint64_t Address)281   bool addressIsReturn(uint64_t Address) const {
282     uint64_t Offset = virtualAddrToOffset(Address);
283     return RetAddrs.count(Offset);
284   }
addressInPrologEpilog(uint64_t Address)285   bool addressInPrologEpilog(uint64_t Address) const {
286     uint64_t Offset = virtualAddrToOffset(Address);
287     return ProEpilogTracker.PrologEpilogSet.count(Offset);
288   }
289 
getAddressforIndex(uint64_t Index)290   uint64_t getAddressforIndex(uint64_t Index) const {
291     return offsetToVirtualAddr(CodeAddrOffsets[Index]);
292   }
293 
usePseudoProbes()294   bool usePseudoProbes() const { return UsePseudoProbes; }
295   // Get the index in CodeAddrOffsets for the address
296   // As we might get an address which is not the code
297   // here it would round to the next valid code address by
298   // using lower bound operation
getIndexForOffset(uint64_t Offset)299   uint32_t getIndexForOffset(uint64_t Offset) const {
300     auto Low = llvm::lower_bound(CodeAddrOffsets, Offset);
301     return Low - CodeAddrOffsets.begin();
302   }
getIndexForAddr(uint64_t Address)303   uint32_t getIndexForAddr(uint64_t Address) const {
304     uint64_t Offset = virtualAddrToOffset(Address);
305     return getIndexForOffset(Offset);
306   }
307 
getCallAddrFromFrameAddr(uint64_t FrameAddr)308   uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const {
309     auto I = getIndexForAddr(FrameAddr);
310     FrameAddr = I ? getAddressforIndex(I - 1) : 0;
311     if (FrameAddr && addressIsCall(FrameAddr))
312       return FrameAddr;
313     return 0;
314   }
315 
getFuncFromStartOffset(uint64_t Offset)316   StringRef getFuncFromStartOffset(uint64_t Offset) {
317     auto I = FuncStartOffsetMap.find(Offset);
318     if (I == FuncStartOffsetMap.end())
319       return StringRef();
320     return I->second.first;
321   }
322 
findFuncOffsetRange(uint64_t Offset)323   OffsetRange findFuncOffsetRange(uint64_t Offset) {
324     auto I = FuncStartOffsetMap.upper_bound(Offset);
325     if (I == FuncStartOffsetMap.begin())
326       return {0, 0};
327     I--;
328     return {I->first, I->second.second};
329   }
330 
getFuncSizeForContext(SampleContext & Context)331   uint32_t getFuncSizeForContext(SampleContext &Context) {
332     return FuncSizeTracker.getFuncSizeForContext(Context);
333   }
334 
335   const SampleContextFrameVector &
336   getFrameLocationStack(uint64_t Offset, bool UseProbeDiscriminator = false) {
337     auto I = Offset2LocStackMap.emplace(Offset, SampleContextFrameVector());
338     if (I.second) {
339       InstructionPointer IP(this, Offset);
340       I.first->second = symbolize(IP, true, UseProbeDiscriminator);
341     }
342     return I.first->second;
343   }
344 
getInlineLeafFrameLoc(uint64_t Offset)345   Optional<SampleContextFrame> getInlineLeafFrameLoc(uint64_t Offset) {
346     const auto &Stack = getFrameLocationStack(Offset);
347     if (Stack.empty())
348       return {};
349     return Stack.back();
350   }
351 
352   // Compare two addresses' inline context
353   bool inlineContextEqual(uint64_t Add1, uint64_t Add2);
354 
355   // Get the full context of the current stack with inline context filled in.
356   // It will search the disassembling info stored in Offset2LocStackMap. This is
357   // used as the key of function sample map
358   SampleContextFrameVector
359   getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
360                      bool &WasLeafInlined);
361   // Go through instructions among the given range and record its size for the
362   // inline context.
363   void computeInlinedContextSizeForRange(uint64_t StartOffset,
364                                          uint64_t EndOffset);
365 
getCallProbeForAddr(uint64_t Address)366   const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
367     return ProbeDecoder.getCallProbeForAddr(Address);
368   }
369 
370   void getInlineContextForProbe(const MCDecodedPseudoProbe *Probe,
371                                 SampleContextFrameVector &InlineContextStack,
372                                 bool IncludeLeaf = false) const {
373     SmallVector<MCPseduoProbeFrameLocation, 16> ProbeInlineContext;
374     ProbeDecoder.getInlineContextForProbe(Probe, ProbeInlineContext,
375                                           IncludeLeaf);
376     for (auto &Callsite : ProbeInlineContext) {
377       InlineContextStack.emplace_back(Callsite.first,
378                                       LineLocation(Callsite.second, 0));
379     }
380   }
getAddress2ProbesMap()381   const AddressProbesMap &getAddress2ProbesMap() const {
382     return ProbeDecoder.getAddress2ProbesMap();
383   }
getFuncDescForGUID(uint64_t GUID)384   const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) {
385     return ProbeDecoder.getFuncDescForGUID(GUID);
386   }
387 
388   const MCPseudoProbeFuncDesc *
getInlinerDescForProbe(const MCDecodedPseudoProbe * Probe)389   getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) {
390     return ProbeDecoder.getInlinerDescForProbe(Probe);
391   }
392 
getTrackFuncContextSize()393   bool getTrackFuncContextSize() { return TrackFuncContextSize; }
394 
getIsLoadedByMMap()395   bool getIsLoadedByMMap() { return IsLoadedByMMap; }
396 
setIsLoadedByMMap(bool Value)397   void setIsLoadedByMMap(bool Value) { IsLoadedByMMap = Value; }
398 
getMissingMMapWarned()399   bool getMissingMMapWarned() { return MissingMMapWarned; }
400 
setMissingMMapWarned(bool Value)401   void setMissingMMapWarned(bool Value) { MissingMMapWarned = Value; }
402 };
403 
404 } // end namespace sampleprof
405 } // end namespace llvm
406 
407 #endif
408