1 //===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H 10 #define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H 11 12 #include "CallContext.h" 13 #include "llvm/ADT/Optional.h" 14 #include "llvm/ADT/StringRef.h" 15 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 16 #include "llvm/MC/MCAsmInfo.h" 17 #include "llvm/MC/MCContext.h" 18 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 19 #include "llvm/MC/MCInst.h" 20 #include "llvm/MC/MCInstPrinter.h" 21 #include "llvm/MC/MCInstrAnalysis.h" 22 #include "llvm/MC/MCInstrInfo.h" 23 #include "llvm/MC/MCObjectFileInfo.h" 24 #include "llvm/MC/MCPseudoProbe.h" 25 #include "llvm/MC/MCRegisterInfo.h" 26 #include "llvm/MC/MCSubtargetInfo.h" 27 #include "llvm/MC/MCTargetOptions.h" 28 #include "llvm/Object/ELFObjectFile.h" 29 #include "llvm/ProfileData/SampleProf.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Path.h" 32 #include "llvm/Transforms/IPO/SampleContextTracker.h" 33 #include <list> 34 #include <map> 35 #include <set> 36 #include <sstream> 37 #include <string> 38 #include <unordered_map> 39 #include <unordered_set> 40 #include <vector> 41 42 extern cl::opt<bool> EnableCSPreInliner; 43 extern cl::opt<bool> UseContextCostForPreInliner; 44 45 using namespace llvm; 46 using namespace sampleprof; 47 using namespace llvm::object; 48 49 namespace llvm { 50 namespace sampleprof { 51 52 class ProfiledBinary; 53 54 struct InstructionPointer { 55 const ProfiledBinary *Binary; 56 union { 57 // Offset of the executable segment of the binary. 58 uint64_t Offset = 0; 59 // Also used as address in unwinder 60 uint64_t Address; 61 }; 62 // Index to the sorted code address array of the binary. 63 uint64_t Index = 0; 64 InstructionPointer(const ProfiledBinary *Binary, uint64_t Address, 65 bool RoundToNext = false); 66 void advance(); 67 void backward(); 68 void update(uint64_t Addr); 69 }; 70 71 // PrologEpilog offset tracker, used to filter out broken stack samples 72 // Currently we use a heuristic size (two) to infer prolog and epilog 73 // based on the start address and return address. In the future, 74 // we will switch to Dwarf CFI based tracker 75 struct PrologEpilogTracker { 76 // A set of prolog and epilog offsets. Used by virtual unwinding. 77 std::unordered_set<uint64_t> PrologEpilogSet; 78 ProfiledBinary *Binary; PrologEpilogTrackerPrologEpilogTracker79 PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){}; 80 81 // Take the two addresses from the start of function as prolog inferPrologOffsetsPrologEpilogTracker82 void inferPrologOffsets(std::map<uint64_t, std::pair<std::string, uint64_t>> 83 &FuncStartOffsetMap) { 84 for (auto I : FuncStartOffsetMap) { 85 PrologEpilogSet.insert(I.first); 86 InstructionPointer IP(Binary, I.first); 87 IP.advance(); 88 PrologEpilogSet.insert(IP.Offset); 89 } 90 } 91 92 // Take the last two addresses before the return address as epilog inferEpilogOffsetsPrologEpilogTracker93 void inferEpilogOffsets(std::unordered_set<uint64_t> &RetAddrs) { 94 for (auto Addr : RetAddrs) { 95 PrologEpilogSet.insert(Addr); 96 InstructionPointer IP(Binary, Addr); 97 IP.backward(); 98 PrologEpilogSet.insert(IP.Offset); 99 } 100 } 101 }; 102 103 // Track function byte size under different context (outlined version as well as 104 // various inlined versions). It also provides query support to get function 105 // size with the best matching context, which is used to help pre-inliner use 106 // accurate post-optimization size to make decisions. 107 // TODO: If an inlinee is completely optimized away, ideally we should have zero 108 // for its context size, currently we would misss such context since it doesn't 109 // have instructions. To fix this, we need to mark all inlinee with entry probe 110 // but without instructions as having zero size. 111 class BinarySizeContextTracker { 112 public: 113 // Add instruction with given size to a context 114 void addInstructionForContext(const SampleContextFrameVector &Context, 115 uint32_t InstrSize); 116 117 // Get function size with a specific context. When there's no exact match 118 // for the given context, try to retrieve the size of that function from 119 // closest matching context. 120 uint32_t getFuncSizeForContext(const SampleContext &Context); 121 122 // For inlinees that are full optimized away, we can establish zero size using 123 // their remaining probes. 124 void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder); 125 dump()126 void dump() { RootContext.dumpTree(); } 127 128 private: 129 using ProbeFrameStack = SmallVector<std::pair<StringRef, uint32_t>>; 130 void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder, 131 MCDecodedPseudoProbeInlineTree &ProbeNode, 132 ProbeFrameStack &Context); 133 134 // Root node for context trie tree, node that this is a reverse context trie 135 // with callee as parent and caller as child. This way we can traverse from 136 // root to find the best/longest matching context if an exact match does not 137 // exist. It gives us the best possible estimate for function's post-inline, 138 // post-optimization byte size. 139 ContextTrieNode RootContext; 140 }; 141 142 using OffsetRange = std::pair<uint64_t, uint64_t>; 143 144 class ProfiledBinary { 145 // Absolute path of the binary. 146 std::string Path; 147 // The target triple. 148 Triple TheTriple; 149 // The runtime base address that the first executable segment is loaded at. 150 uint64_t BaseAddress = 0; 151 // The preferred load address of each executable segment. 152 std::vector<uint64_t> PreferredTextSegmentAddresses; 153 // The file offset of each executable segment. 154 std::vector<uint64_t> TextSegmentOffsets; 155 156 // Mutiple MC component info 157 std::unique_ptr<const MCRegisterInfo> MRI; 158 std::unique_ptr<const MCAsmInfo> AsmInfo; 159 std::unique_ptr<const MCSubtargetInfo> STI; 160 std::unique_ptr<const MCInstrInfo> MII; 161 std::unique_ptr<MCDisassembler> DisAsm; 162 std::unique_ptr<const MCInstrAnalysis> MIA; 163 std::unique_ptr<MCInstPrinter> IPrinter; 164 // A list of text sections sorted by start RVA and size. Used to check 165 // if a given RVA is a valid code address. 166 std::set<std::pair<uint64_t, uint64_t>> TextSections; 167 // An ordered map of mapping function's start offset to its name and 168 // end offset. 169 std::map<uint64_t, std::pair<std::string, uint64_t>> FuncStartOffsetMap; 170 // Offset to context location map. Used to expand the context. 171 std::unordered_map<uint64_t, SampleContextFrameVector> Offset2LocStackMap; 172 173 // Offset to instruction size map. Also used for quick offset lookup. 174 std::unordered_map<uint64_t, uint64_t> Offset2InstSizeMap; 175 176 // An array of offsets of all instructions sorted in increasing order. The 177 // sorting is needed to fast advance to the next forward/backward instruction. 178 std::vector<uint64_t> CodeAddrOffsets; 179 // A set of call instruction offsets. Used by virtual unwinding. 180 std::unordered_set<uint64_t> CallAddrs; 181 // A set of return instruction offsets. Used by virtual unwinding. 182 std::unordered_set<uint64_t> RetAddrs; 183 184 // Estimate and track function prolog and epilog ranges. 185 PrologEpilogTracker ProEpilogTracker; 186 187 // Track function sizes under different context 188 BinarySizeContextTracker FuncSizeTracker; 189 190 // The symbolizer used to get inline context for an instruction. 191 std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer; 192 193 // String table owning function name strings created from the symbolizer. 194 std::unordered_set<std::string> NameStrings; 195 196 // A collection of functions to print disassembly for. 197 StringSet<> DisassembleFunctionSet; 198 199 // Pseudo probe decoder 200 MCPseudoProbeDecoder ProbeDecoder; 201 202 bool UsePseudoProbes = false; 203 204 // Whether we need to symbolize all instructions to get function context size. 205 bool TrackFuncContextSize = false; 206 207 // Indicate if the base loading address is parsed from the mmap event or uses 208 // the preferred address 209 bool IsLoadedByMMap = false; 210 // Use to avoid redundant warning. 211 bool MissingMMapWarned = false; 212 213 void setPreferredTextSegmentAddresses(const ELFObjectFileBase *O); 214 215 template <class ELFT> 216 void setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName); 217 218 void decodePseudoProbe(const ELFObjectFileBase *Obj); 219 220 // Set up disassembler and related components. 221 void setUpDisassembler(const ELFObjectFileBase *Obj); 222 void setupSymbolizer(); 223 224 /// Dissassemble the text section and build various address maps. 225 void disassemble(const ELFObjectFileBase *O); 226 227 /// Helper function to dissassemble the symbol and extract info for unwinding 228 bool dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes, 229 SectionSymbolsTy &Symbols, const SectionRef &Section); 230 /// Symbolize a given instruction pointer and return a full call context. 231 SampleContextFrameVector symbolize(const InstructionPointer &IP, 232 bool UseCanonicalFnName = false, 233 bool UseProbeDiscriminator = false); 234 /// Decode the interesting parts of the binary and build internal data 235 /// structures. On high level, the parts of interest are: 236 /// 1. Text sections, including the main code section and the PLT 237 /// entries that will be used to handle cross-module call transitions. 238 /// 2. The .debug_line section, used by Dwarf-based profile generation. 239 /// 3. Pseudo probe related sections, used by probe-based profile 240 /// generation. 241 void load(); 242 243 public: ProfiledBinary(const StringRef Path)244 ProfiledBinary(const StringRef Path) 245 : Path(Path), ProEpilogTracker(this), 246 TrackFuncContextSize(EnableCSPreInliner && 247 UseContextCostForPreInliner) { 248 setupSymbolizer(); 249 load(); 250 } virtualAddrToOffset(uint64_t VirtualAddress)251 uint64_t virtualAddrToOffset(uint64_t VirtualAddress) const { 252 return VirtualAddress - BaseAddress; 253 } offsetToVirtualAddr(uint64_t Offset)254 uint64_t offsetToVirtualAddr(uint64_t Offset) const { 255 return Offset + BaseAddress; 256 } getPath()257 StringRef getPath() const { return Path; } getName()258 StringRef getName() const { return llvm::sys::path::filename(Path); } getBaseAddress()259 uint64_t getBaseAddress() const { return BaseAddress; } setBaseAddress(uint64_t Address)260 void setBaseAddress(uint64_t Address) { BaseAddress = Address; } 261 262 // Return the preferred load address for the first executable segment. getPreferredBaseAddress()263 uint64_t getPreferredBaseAddress() const { return PreferredTextSegmentAddresses[0]; } 264 // Return the file offset for the first executable segment. getTextSegmentOffset()265 uint64_t getTextSegmentOffset() const { return TextSegmentOffsets[0]; } getPreferredTextSegmentAddresses()266 const std::vector<uint64_t> &getPreferredTextSegmentAddresses() const { 267 return PreferredTextSegmentAddresses; 268 } getTextSegmentOffsets()269 const std::vector<uint64_t> &getTextSegmentOffsets() const { 270 return TextSegmentOffsets; 271 } 272 addressIsCode(uint64_t Address)273 bool addressIsCode(uint64_t Address) const { 274 uint64_t Offset = virtualAddrToOffset(Address); 275 return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end(); 276 } addressIsCall(uint64_t Address)277 bool addressIsCall(uint64_t Address) const { 278 uint64_t Offset = virtualAddrToOffset(Address); 279 return CallAddrs.count(Offset); 280 } addressIsReturn(uint64_t Address)281 bool addressIsReturn(uint64_t Address) const { 282 uint64_t Offset = virtualAddrToOffset(Address); 283 return RetAddrs.count(Offset); 284 } addressInPrologEpilog(uint64_t Address)285 bool addressInPrologEpilog(uint64_t Address) const { 286 uint64_t Offset = virtualAddrToOffset(Address); 287 return ProEpilogTracker.PrologEpilogSet.count(Offset); 288 } 289 getAddressforIndex(uint64_t Index)290 uint64_t getAddressforIndex(uint64_t Index) const { 291 return offsetToVirtualAddr(CodeAddrOffsets[Index]); 292 } 293 usePseudoProbes()294 bool usePseudoProbes() const { return UsePseudoProbes; } 295 // Get the index in CodeAddrOffsets for the address 296 // As we might get an address which is not the code 297 // here it would round to the next valid code address by 298 // using lower bound operation getIndexForOffset(uint64_t Offset)299 uint32_t getIndexForOffset(uint64_t Offset) const { 300 auto Low = llvm::lower_bound(CodeAddrOffsets, Offset); 301 return Low - CodeAddrOffsets.begin(); 302 } getIndexForAddr(uint64_t Address)303 uint32_t getIndexForAddr(uint64_t Address) const { 304 uint64_t Offset = virtualAddrToOffset(Address); 305 return getIndexForOffset(Offset); 306 } 307 getCallAddrFromFrameAddr(uint64_t FrameAddr)308 uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const { 309 auto I = getIndexForAddr(FrameAddr); 310 FrameAddr = I ? getAddressforIndex(I - 1) : 0; 311 if (FrameAddr && addressIsCall(FrameAddr)) 312 return FrameAddr; 313 return 0; 314 } 315 getFuncFromStartOffset(uint64_t Offset)316 StringRef getFuncFromStartOffset(uint64_t Offset) { 317 auto I = FuncStartOffsetMap.find(Offset); 318 if (I == FuncStartOffsetMap.end()) 319 return StringRef(); 320 return I->second.first; 321 } 322 findFuncOffsetRange(uint64_t Offset)323 OffsetRange findFuncOffsetRange(uint64_t Offset) { 324 auto I = FuncStartOffsetMap.upper_bound(Offset); 325 if (I == FuncStartOffsetMap.begin()) 326 return {0, 0}; 327 I--; 328 return {I->first, I->second.second}; 329 } 330 getFuncSizeForContext(SampleContext & Context)331 uint32_t getFuncSizeForContext(SampleContext &Context) { 332 return FuncSizeTracker.getFuncSizeForContext(Context); 333 } 334 335 const SampleContextFrameVector & 336 getFrameLocationStack(uint64_t Offset, bool UseProbeDiscriminator = false) { 337 auto I = Offset2LocStackMap.emplace(Offset, SampleContextFrameVector()); 338 if (I.second) { 339 InstructionPointer IP(this, Offset); 340 I.first->second = symbolize(IP, true, UseProbeDiscriminator); 341 } 342 return I.first->second; 343 } 344 getInlineLeafFrameLoc(uint64_t Offset)345 Optional<SampleContextFrame> getInlineLeafFrameLoc(uint64_t Offset) { 346 const auto &Stack = getFrameLocationStack(Offset); 347 if (Stack.empty()) 348 return {}; 349 return Stack.back(); 350 } 351 352 // Compare two addresses' inline context 353 bool inlineContextEqual(uint64_t Add1, uint64_t Add2); 354 355 // Get the full context of the current stack with inline context filled in. 356 // It will search the disassembling info stored in Offset2LocStackMap. This is 357 // used as the key of function sample map 358 SampleContextFrameVector 359 getExpandedContext(const SmallVectorImpl<uint64_t> &Stack, 360 bool &WasLeafInlined); 361 // Go through instructions among the given range and record its size for the 362 // inline context. 363 void computeInlinedContextSizeForRange(uint64_t StartOffset, 364 uint64_t EndOffset); 365 getCallProbeForAddr(uint64_t Address)366 const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const { 367 return ProbeDecoder.getCallProbeForAddr(Address); 368 } 369 370 void getInlineContextForProbe(const MCDecodedPseudoProbe *Probe, 371 SampleContextFrameVector &InlineContextStack, 372 bool IncludeLeaf = false) const { 373 SmallVector<MCPseduoProbeFrameLocation, 16> ProbeInlineContext; 374 ProbeDecoder.getInlineContextForProbe(Probe, ProbeInlineContext, 375 IncludeLeaf); 376 for (auto &Callsite : ProbeInlineContext) { 377 InlineContextStack.emplace_back(Callsite.first, 378 LineLocation(Callsite.second, 0)); 379 } 380 } getAddress2ProbesMap()381 const AddressProbesMap &getAddress2ProbesMap() const { 382 return ProbeDecoder.getAddress2ProbesMap(); 383 } getFuncDescForGUID(uint64_t GUID)384 const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) { 385 return ProbeDecoder.getFuncDescForGUID(GUID); 386 } 387 388 const MCPseudoProbeFuncDesc * getInlinerDescForProbe(const MCDecodedPseudoProbe * Probe)389 getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) { 390 return ProbeDecoder.getInlinerDescForProbe(Probe); 391 } 392 getTrackFuncContextSize()393 bool getTrackFuncContextSize() { return TrackFuncContextSize; } 394 getIsLoadedByMMap()395 bool getIsLoadedByMMap() { return IsLoadedByMMap; } 396 setIsLoadedByMMap(bool Value)397 void setIsLoadedByMMap(bool Value) { IsLoadedByMMap = Value; } 398 getMissingMMapWarned()399 bool getMissingMMapWarned() { return MissingMMapWarned; } 400 setMissingMMapWarned(bool Value)401 void setMissingMMapWarned(bool Value) { MissingMMapWarned = Value; } 402 }; 403 404 } // end namespace sampleprof 405 } // end namespace llvm 406 407 #endif 408