1 //===- MCPseudoProbe.h - Pseudo probe encoding support ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the declaration of the MCPseudoProbe to support the pseudo 10 // probe encoding for AutoFDO. Pseudo probes together with their inline context 11 // are encoded in a DFS recursive way in the .pseudoprobe sections. For each 12 // .pseudoprobe section, the encoded binary data consist of a single or mutiple 13 // function records each for one outlined function. A function record has the 14 // following format : 15 // 16 // FUNCTION BODY (one for each outlined function present in the text section) 17 // GUID (uint64) 18 // GUID of the function 19 // NPROBES (ULEB128) 20 // Number of probes originating from this function. 21 // NUM_INLINED_FUNCTIONS (ULEB128) 22 // Number of callees inlined into this function, aka number of 23 // first-level inlinees 24 // PROBE RECORDS 25 // A list of NPROBES entries. Each entry contains: 26 // INDEX (ULEB128) 27 // TYPE (uint4) 28 // 0 - block probe, 1 - indirect call, 2 - direct call 29 // ATTRIBUTE (uint3) 30 // 1 - reserved 31 // ADDRESS_TYPE (uint1) 32 // 0 - code address, 1 - address delta 33 // CODE_ADDRESS (uint64 or ULEB128) 34 // code address or address delta, depending on ADDRESS_TYPE 35 // INLINED FUNCTION RECORDS 36 // A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined 37 // callees. Each record contains: 38 // INLINE SITE 39 // ID of the callsite probe (ULEB128) 40 // FUNCTION BODY 41 // A FUNCTION BODY entry describing the inlined function. 42 //===----------------------------------------------------------------------===// 43 44 #ifndef LLVM_MC_MCPSEUDOPROBE_H 45 #define LLVM_MC_MCPSEUDOPROBE_H 46 47 #include "llvm/ADT/SmallVector.h" 48 #include "llvm/ADT/StringRef.h" 49 #include "llvm/IR/PseudoProbe.h" 50 #include "llvm/Support/ErrorOr.h" 51 #include <list> 52 #include <map> 53 #include <memory> 54 #include <string> 55 #include <tuple> 56 #include <type_traits> 57 #include <unordered_map> 58 #include <vector> 59 60 namespace llvm { 61 62 class MCSection; 63 class MCStreamer; 64 class MCSymbol; 65 class MCObjectStreamer; 66 class raw_ostream; 67 68 enum class MCPseudoProbeFlag { 69 // If set, indicates that the probe is encoded as an address delta 70 // instead of a real code address. 71 AddressDelta = 0x1, 72 }; 73 74 // Function descriptor decoded from .pseudo_probe_desc section 75 struct MCPseudoProbeFuncDesc { 76 uint64_t FuncGUID = 0; 77 uint64_t FuncHash = 0; 78 std::string FuncName; 79 MCPseudoProbeFuncDescMCPseudoProbeFuncDesc80 MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name) 81 : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){}; 82 83 void print(raw_ostream &OS); 84 }; 85 86 class MCPseudoProbe; 87 class MCDecodedPseudoProbe; 88 89 // An inline frame has the form <Guid, ProbeID> 90 using InlineSite = std::tuple<uint64_t, uint32_t>; 91 using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>; 92 // GUID to PseudoProbeFuncDesc map 93 using GUIDProbeFunctionMap = 94 std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>; 95 // Address to pseudo probes map. 96 using AddressProbesMap = 97 std::unordered_map<uint64_t, std::list<MCDecodedPseudoProbe>>; 98 99 class MCPseudoProbeInlineTree; 100 class MCDecodedPseudoProbeInlineTree; 101 102 class MCPseudoProbeBase { 103 protected: 104 uint64_t Guid; 105 uint64_t Index; 106 uint8_t Attributes; 107 uint8_t Type; 108 // The value should be equal to PseudoProbeReservedId::Last + 1 which is 109 // defined in SampleProfileProbe.h. The header file is not included here to 110 // reduce the dependency from MC to IPO. 111 const static uint32_t PseudoProbeFirstId = 1; 112 113 public: MCPseudoProbeBase(uint64_t G,uint64_t I,uint64_t At,uint8_t T)114 MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T) 115 : Guid(G), Index(I), Attributes(At), Type(T) {} 116 isEntry()117 bool isEntry() const { return Index == PseudoProbeFirstId; } 118 getGuid()119 uint64_t getGuid() const { return Guid; } 120 getIndex()121 uint64_t getIndex() const { return Index; } 122 getAttributes()123 uint8_t getAttributes() const { return Attributes; } 124 getType()125 uint8_t getType() const { return Type; } 126 isBlock()127 bool isBlock() const { 128 return Type == static_cast<uint8_t>(PseudoProbeType::Block); 129 } 130 isIndirectCall()131 bool isIndirectCall() const { 132 return Type == static_cast<uint8_t>(PseudoProbeType::IndirectCall); 133 } 134 isDirectCall()135 bool isDirectCall() const { 136 return Type == static_cast<uint8_t>(PseudoProbeType::DirectCall); 137 } 138 isCall()139 bool isCall() const { return isIndirectCall() || isDirectCall(); } 140 setAttributes(uint8_t Attr)141 void setAttributes(uint8_t Attr) { Attributes = Attr; } 142 }; 143 144 /// Instances of this class represent a pseudo probe instance for a pseudo probe 145 /// table entry, which is created during a machine instruction is assembled and 146 /// uses an address from a temporary label created at the current address in the 147 /// current section. 148 class MCPseudoProbe : public MCPseudoProbeBase { 149 MCSymbol *Label; 150 151 public: MCPseudoProbe(MCSymbol * Label,uint64_t Guid,uint64_t Index,uint64_t Type,uint64_t Attributes)152 MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type, 153 uint64_t Attributes) 154 : MCPseudoProbeBase(Guid, Index, Attributes, Type), Label(Label) { 155 assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8"); 156 assert(Attributes <= 0xFF && 157 "Probe attributes too big to encode, exceeding 2^16"); 158 } 159 getLabel()160 MCSymbol *getLabel() const { return Label; } 161 void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const; 162 }; 163 164 // Represents a callsite with caller function name and probe id 165 using MCPseduoProbeFrameLocation = std::pair<StringRef, uint32_t>; 166 167 class MCDecodedPseudoProbe : public MCPseudoProbeBase { 168 uint64_t Address; 169 MCDecodedPseudoProbeInlineTree *InlineTree; 170 171 public: MCDecodedPseudoProbe(uint64_t Ad,uint64_t G,uint32_t I,PseudoProbeType K,uint8_t At,MCDecodedPseudoProbeInlineTree * Tree)172 MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K, 173 uint8_t At, MCDecodedPseudoProbeInlineTree *Tree) 174 : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K)), Address(Ad), 175 InlineTree(Tree){}; 176 getAddress()177 uint64_t getAddress() const { return Address; } 178 setAddress(uint64_t Addr)179 void setAddress(uint64_t Addr) { Address = Addr; } 180 getInlineTreeNode()181 MCDecodedPseudoProbeInlineTree *getInlineTreeNode() const { 182 return InlineTree; 183 } 184 185 // Get the inlined context by traversing current inline tree backwards, 186 // each tree node has its InlineSite which is taken as the context. 187 // \p ContextStack is populated in root to leaf order 188 void 189 getInlineContext(SmallVectorImpl<MCPseduoProbeFrameLocation> &ContextStack, 190 const GUIDProbeFunctionMap &GUID2FuncMAP) const; 191 192 // Helper function to get the string from context stack 193 std::string 194 getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const; 195 196 // Print pseudo probe while disassembling 197 void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP, 198 bool ShowName) const; 199 }; 200 201 template <typename ProbeType, typename DerivedProbeInlineTreeType> 202 class MCPseudoProbeInlineTreeBase { 203 struct InlineSiteHash { operatorInlineSiteHash204 uint64_t operator()(const InlineSite &Site) const { 205 return std::get<0>(Site) ^ std::get<1>(Site); 206 } 207 }; 208 209 protected: 210 // Track children (e.g. inlinees) of current context 211 using InlinedProbeTreeMap = std::unordered_map< 212 InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>; 213 InlinedProbeTreeMap Children; 214 // Set of probes that come with the function. 215 std::vector<ProbeType> Probes; MCPseudoProbeInlineTreeBase()216 MCPseudoProbeInlineTreeBase() { 217 static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase, 218 DerivedProbeInlineTreeType>::value, 219 "DerivedProbeInlineTreeType must be subclass of " 220 "MCPseudoProbeInlineTreeBase"); 221 } 222 223 public: 224 uint64_t Guid = 0; 225 226 // Root node has a GUID 0. isRoot()227 bool isRoot() const { return Guid == 0; } getChildren()228 InlinedProbeTreeMap &getChildren() { return Children; } getChildren()229 const InlinedProbeTreeMap &getChildren() const { return Children; } getProbes()230 std::vector<ProbeType> &getProbes() { return Probes; } addProbes(ProbeType Probe)231 void addProbes(ProbeType Probe) { Probes.push_back(Probe); } 232 // Caller node of the inline site 233 MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent; getOrAddNode(const InlineSite & Site)234 DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) { 235 auto Ret = Children.emplace( 236 Site, std::make_unique<DerivedProbeInlineTreeType>(Site)); 237 Ret.first->second->Parent = this; 238 return Ret.first->second.get(); 239 }; 240 }; 241 242 // A Tri-tree based data structure to group probes by inline stack. 243 // A tree is allocated for a standalone .text section. A fake 244 // instance is created as the root of a tree. 245 // A real instance of this class is created for each function, either a 246 // not inlined function that has code in .text section or an inlined function. 247 class MCPseudoProbeInlineTree 248 : public MCPseudoProbeInlineTreeBase<MCPseudoProbe, 249 MCPseudoProbeInlineTree> { 250 public: 251 MCPseudoProbeInlineTree() = default; MCPseudoProbeInlineTree(uint64_t Guid)252 MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; } MCPseudoProbeInlineTree(const InlineSite & Site)253 MCPseudoProbeInlineTree(const InlineSite &Site) { 254 this->Guid = std::get<0>(Site); 255 } 256 257 // MCPseudoProbeInlineTree method based on Inlinees 258 void addPseudoProbe(const MCPseudoProbe &Probe, 259 const MCPseudoProbeInlineStack &InlineStack); 260 void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe); 261 }; 262 263 // inline tree node for the decoded pseudo probe 264 class MCDecodedPseudoProbeInlineTree 265 : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *, 266 MCDecodedPseudoProbeInlineTree> { 267 public: 268 InlineSite ISite; 269 // Used for decoding 270 uint32_t ChildrenToProcess = 0; 271 MCDecodedPseudoProbeInlineTree()272 MCDecodedPseudoProbeInlineTree(){}; MCDecodedPseudoProbeInlineTree(const InlineSite & Site)273 MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){}; 274 275 // Return false if it's a dummy inline site hasInlineSite()276 bool hasInlineSite() const { return std::get<0>(ISite) != 0; } 277 }; 278 279 /// Instances of this class represent the pseudo probes inserted into a compile 280 /// unit. 281 class MCPseudoProbeSection { 282 public: addPseudoProbe(MCSection * Sec,const MCPseudoProbe & Probe,const MCPseudoProbeInlineStack & InlineStack)283 void addPseudoProbe(MCSection *Sec, const MCPseudoProbe &Probe, 284 const MCPseudoProbeInlineStack &InlineStack) { 285 MCProbeDivisions[Sec].addPseudoProbe(Probe, InlineStack); 286 } 287 288 // TODO: Sort by getOrdinal to ensure a determinstic section order 289 using MCProbeDivisionMap = std::map<MCSection *, MCPseudoProbeInlineTree>; 290 291 private: 292 // A collection of MCPseudoProbe for each text section. The MCPseudoProbes 293 // are grouped by GUID of the functions where they are from and will be 294 // encoded by groups. In the comdat scenario where a text section really only 295 // contains the code of a function solely, the probes associated with a comdat 296 // function are still grouped by GUIDs due to inlining that can bring probes 297 // from different functions into one function. 298 MCProbeDivisionMap MCProbeDivisions; 299 300 public: getMCProbes()301 const MCProbeDivisionMap &getMCProbes() const { return MCProbeDivisions; } 302 empty()303 bool empty() const { return MCProbeDivisions.empty(); } 304 305 void emit(MCObjectStreamer *MCOS); 306 }; 307 308 class MCPseudoProbeTable { 309 // A collection of MCPseudoProbe in the current module grouped by text 310 // sections. MCPseudoProbes will be encoded into a corresponding 311 // .pseudoprobe section. With functions emitted as separate comdats, 312 // a text section really only contains the code of a function solely, and the 313 // probes associated with the text section will be emitted into a standalone 314 // .pseudoprobe section that shares the same comdat group with the function. 315 MCPseudoProbeSection MCProbeSections; 316 317 public: 318 static void emit(MCObjectStreamer *MCOS); 319 getProbeSections()320 MCPseudoProbeSection &getProbeSections() { return MCProbeSections; } 321 322 #ifndef NDEBUG 323 static int DdgPrintIndent; 324 #endif 325 }; 326 327 class MCPseudoProbeDecoder { 328 // GUID to PseudoProbeFuncDesc map. 329 GUIDProbeFunctionMap GUID2FuncDescMap; 330 331 // Address to probes map. 332 AddressProbesMap Address2ProbesMap; 333 334 // The dummy root of the inline trie, all the outlined function will directly 335 // be the children of the dummy root, all the inlined function will be the 336 // children of its inlineer. So the relation would be like: 337 // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2 338 MCDecodedPseudoProbeInlineTree DummyInlineRoot; 339 340 /// Points to the current location in the buffer. 341 const uint8_t *Data = nullptr; 342 343 /// Points to the end of the buffer. 344 const uint8_t *End = nullptr; 345 346 // Decoding helper function 347 template <typename T> ErrorOr<T> readUnencodedNumber(); 348 template <typename T> ErrorOr<T> readUnsignedNumber(); 349 template <typename T> ErrorOr<T> readSignedNumber(); 350 ErrorOr<StringRef> readString(uint32_t Size); 351 352 public: 353 // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map. 354 bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size); 355 356 // Decode pseudo_probe section to build address to probes map. 357 bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size); 358 359 // Print pseudo_probe_desc section info 360 void printGUID2FuncDescMap(raw_ostream &OS); 361 362 // Print pseudo_probe section info, used along with show-disassembly 363 void printProbeForAddress(raw_ostream &OS, uint64_t Address); 364 365 // do printProbeForAddress for all addresses 366 void printProbesForAllAddresses(raw_ostream &OS); 367 368 // Look up the probe of a call for the input address 369 const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const; 370 371 const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const; 372 373 // Helper function to populate one probe's inline stack into 374 // \p InlineContextStack. 375 // Current leaf location info will be added if IncludeLeaf is true 376 // Example: 377 // Current probe(bar:3) inlined at foo:2 then inlined at main:1 378 // IncludeLeaf = true, Output: [main:1, foo:2, bar:3] 379 // IncludeLeaf = false, Output: [main:1, foo:2] 380 void getInlineContextForProbe( 381 const MCDecodedPseudoProbe *Probe, 382 SmallVectorImpl<MCPseduoProbeFrameLocation> &InlineContextStack, 383 bool IncludeLeaf) const; 384 getAddress2ProbesMap()385 const AddressProbesMap &getAddress2ProbesMap() const { 386 return Address2ProbesMap; 387 } 388 getAddress2ProbesMap()389 AddressProbesMap &getAddress2ProbesMap() { return Address2ProbesMap; } 390 getGUID2FuncDescMap()391 const GUIDProbeFunctionMap &getGUID2FuncDescMap() const { 392 return GUID2FuncDescMap; 393 } 394 395 const MCPseudoProbeFuncDesc * 396 getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) const; 397 getDummyInlineRoot()398 const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const { 399 return DummyInlineRoot; 400 } 401 }; 402 403 } // end namespace llvm 404 405 #endif // LLVM_MC_MCPSEUDOPROBE_H 406