1 //===- MCPseudoProbe.h - Pseudo probe encoding support ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the declaration of the MCPseudoProbe to support the pseudo 10 // probe encoding for AutoFDO. Pseudo probes together with their inline context 11 // are encoded in a DFS recursive way in the .pseudoprobe sections. For each 12 // .pseudoprobe section, the encoded binary data consist of a single or mutiple 13 // function records each for one outlined function. A function record has the 14 // following format : 15 // 16 // FUNCTION BODY (one for each outlined function present in the text section) 17 // GUID (uint64) 18 // GUID of the function 19 // NPROBES (ULEB128) 20 // Number of probes originating from this function. 21 // NUM_INLINED_FUNCTIONS (ULEB128) 22 // Number of callees inlined into this function, aka number of 23 // first-level inlinees 24 // PROBE RECORDS 25 // A list of NPROBES entries. Each entry contains: 26 // INDEX (ULEB128) 27 // TYPE (uint4) 28 // 0 - block probe, 1 - indirect call, 2 - direct call 29 // ATTRIBUTE (uint3) 30 // 1 - reserved 31 // ADDRESS_TYPE (uint1) 32 // 0 - code address, 1 - address delta 33 // CODE_ADDRESS (uint64 or ULEB128) 34 // code address or address delta, depending on ADDRESS_TYPE 35 // INLINED FUNCTION RECORDS 36 // A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined 37 // callees. Each record contains: 38 // INLINE SITE 39 // ID of the callsite probe (ULEB128) 40 // FUNCTION BODY 41 // A FUNCTION BODY entry describing the inlined function. 42 //===----------------------------------------------------------------------===// 43 44 #ifndef LLVM_MC_MCPSEUDOPROBE_H 45 #define LLVM_MC_MCPSEUDOPROBE_H 46 47 #include "llvm/ADT/SmallVector.h" 48 #include "llvm/ADT/StringRef.h" 49 #include "llvm/IR/PseudoProbe.h" 50 #include "llvm/Support/ErrorOr.h" 51 #include <list> 52 #include <map> 53 #include <memory> 54 #include <string> 55 #include <tuple> 56 #include <type_traits> 57 #include <unordered_map> 58 #include <unordered_set> 59 #include <vector> 60 61 namespace llvm { 62 63 class MCSection; 64 class MCSymbol; 65 class MCObjectStreamer; 66 class raw_ostream; 67 68 enum class MCPseudoProbeFlag { 69 // If set, indicates that the probe is encoded as an address delta 70 // instead of a real code address. 71 AddressDelta = 0x1, 72 }; 73 74 // Function descriptor decoded from .pseudo_probe_desc section 75 struct MCPseudoProbeFuncDesc { 76 uint64_t FuncGUID = 0; 77 uint64_t FuncHash = 0; 78 std::string FuncName; 79 80 MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name) 81 : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){}; 82 83 void print(raw_ostream &OS); 84 }; 85 86 class MCDecodedPseudoProbe; 87 88 // An inline frame has the form <CalleeGuid, ProbeID> 89 using InlineSite = std::tuple<uint64_t, uint32_t>; 90 using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>; 91 // GUID to PseudoProbeFuncDesc map 92 using GUIDProbeFunctionMap = 93 std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>; 94 // Address to pseudo probes map. 95 using AddressProbesMap = 96 std::unordered_map<uint64_t, std::list<MCDecodedPseudoProbe>>; 97 98 class MCDecodedPseudoProbeInlineTree; 99 100 class MCPseudoProbeBase { 101 protected: 102 uint64_t Guid; 103 uint64_t Index; 104 uint8_t Attributes; 105 uint8_t Type; 106 // The value should be equal to PseudoProbeReservedId::Last + 1 which is 107 // defined in SampleProfileProbe.h. The header file is not included here to 108 // reduce the dependency from MC to IPO. 109 const static uint32_t PseudoProbeFirstId = 1; 110 111 public: 112 MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T) 113 : Guid(G), Index(I), Attributes(At), Type(T) {} 114 115 bool isEntry() const { return Index == PseudoProbeFirstId; } 116 117 uint64_t getGuid() const { return Guid; } 118 119 uint64_t getIndex() const { return Index; } 120 121 uint8_t getAttributes() const { return Attributes; } 122 123 uint8_t getType() const { return Type; } 124 125 bool isBlock() const { 126 return Type == static_cast<uint8_t>(PseudoProbeType::Block); 127 } 128 129 bool isIndirectCall() const { 130 return Type == static_cast<uint8_t>(PseudoProbeType::IndirectCall); 131 } 132 133 bool isDirectCall() const { 134 return Type == static_cast<uint8_t>(PseudoProbeType::DirectCall); 135 } 136 137 bool isCall() const { return isIndirectCall() || isDirectCall(); } 138 139 void setAttributes(uint8_t Attr) { Attributes = Attr; } 140 }; 141 142 /// Instances of this class represent a pseudo probe instance for a pseudo probe 143 /// table entry, which is created during a machine instruction is assembled and 144 /// uses an address from a temporary label created at the current address in the 145 /// current section. 146 class MCPseudoProbe : public MCPseudoProbeBase { 147 MCSymbol *Label; 148 149 public: 150 MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type, 151 uint64_t Attributes) 152 : MCPseudoProbeBase(Guid, Index, Attributes, Type), Label(Label) { 153 assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8"); 154 assert(Attributes <= 0xFF && 155 "Probe attributes too big to encode, exceeding 2^16"); 156 } 157 158 MCSymbol *getLabel() const { return Label; } 159 void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const; 160 }; 161 162 // Represents a callsite with caller function name and probe id 163 using MCPseduoProbeFrameLocation = std::pair<StringRef, uint32_t>; 164 165 class MCDecodedPseudoProbe : public MCPseudoProbeBase { 166 uint64_t Address; 167 MCDecodedPseudoProbeInlineTree *InlineTree; 168 169 public: 170 MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K, 171 uint8_t At, MCDecodedPseudoProbeInlineTree *Tree) 172 : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K)), Address(Ad), 173 InlineTree(Tree){}; 174 175 uint64_t getAddress() const { return Address; } 176 177 void setAddress(uint64_t Addr) { Address = Addr; } 178 179 MCDecodedPseudoProbeInlineTree *getInlineTreeNode() const { 180 return InlineTree; 181 } 182 183 // Get the inlined context by traversing current inline tree backwards, 184 // each tree node has its InlineSite which is taken as the context. 185 // \p ContextStack is populated in root to leaf order 186 void 187 getInlineContext(SmallVectorImpl<MCPseduoProbeFrameLocation> &ContextStack, 188 const GUIDProbeFunctionMap &GUID2FuncMAP) const; 189 190 // Helper function to get the string from context stack 191 std::string 192 getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const; 193 194 // Print pseudo probe while disassembling 195 void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP, 196 bool ShowName) const; 197 }; 198 199 template <typename ProbeType, typename DerivedProbeInlineTreeType> 200 class MCPseudoProbeInlineTreeBase { 201 struct InlineSiteHash { 202 uint64_t operator()(const InlineSite &Site) const { 203 return std::get<0>(Site) ^ std::get<1>(Site); 204 } 205 }; 206 207 protected: 208 // Track children (e.g. inlinees) of current context 209 using InlinedProbeTreeMap = std::unordered_map< 210 InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>; 211 InlinedProbeTreeMap Children; 212 // Set of probes that come with the function. 213 std::vector<ProbeType> Probes; 214 MCPseudoProbeInlineTreeBase() { 215 static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase, 216 DerivedProbeInlineTreeType>::value, 217 "DerivedProbeInlineTreeType must be subclass of " 218 "MCPseudoProbeInlineTreeBase"); 219 } 220 221 public: 222 uint64_t Guid = 0; 223 224 // Root node has a GUID 0. 225 bool isRoot() const { return Guid == 0; } 226 InlinedProbeTreeMap &getChildren() { return Children; } 227 const InlinedProbeTreeMap &getChildren() const { return Children; } 228 std::vector<ProbeType> &getProbes() { return Probes; } 229 void addProbes(ProbeType Probe) { Probes.push_back(Probe); } 230 // Caller node of the inline site 231 MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent; 232 DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) { 233 auto Ret = Children.emplace( 234 Site, std::make_unique<DerivedProbeInlineTreeType>(Site)); 235 Ret.first->second->Parent = this; 236 return Ret.first->second.get(); 237 }; 238 }; 239 240 // A Tri-tree based data structure to group probes by inline stack. 241 // A tree is allocated for a standalone .text section. A fake 242 // instance is created as the root of a tree. 243 // A real instance of this class is created for each function, either a 244 // not inlined function that has code in .text section or an inlined function. 245 class MCPseudoProbeInlineTree 246 : public MCPseudoProbeInlineTreeBase<MCPseudoProbe, 247 MCPseudoProbeInlineTree> { 248 public: 249 MCPseudoProbeInlineTree() = default; 250 MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; } 251 MCPseudoProbeInlineTree(const InlineSite &Site) { 252 this->Guid = std::get<0>(Site); 253 } 254 255 // MCPseudoProbeInlineTree method based on Inlinees 256 void addPseudoProbe(const MCPseudoProbe &Probe, 257 const MCPseudoProbeInlineStack &InlineStack); 258 void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe); 259 }; 260 261 // inline tree node for the decoded pseudo probe 262 class MCDecodedPseudoProbeInlineTree 263 : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *, 264 MCDecodedPseudoProbeInlineTree> { 265 public: 266 InlineSite ISite; 267 // Used for decoding 268 uint32_t ChildrenToProcess = 0; 269 270 MCDecodedPseudoProbeInlineTree() = default; 271 MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){}; 272 273 // Return false if it's a dummy inline site 274 bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); } 275 }; 276 277 /// Instances of this class represent the pseudo probes inserted into a compile 278 /// unit. 279 class MCPseudoProbeSection { 280 public: 281 void addPseudoProbe(MCSection *Sec, const MCPseudoProbe &Probe, 282 const MCPseudoProbeInlineStack &InlineStack) { 283 MCProbeDivisions[Sec].addPseudoProbe(Probe, InlineStack); 284 } 285 286 // TODO: Sort by getOrdinal to ensure a determinstic section order 287 using MCProbeDivisionMap = std::map<MCSection *, MCPseudoProbeInlineTree>; 288 289 private: 290 // A collection of MCPseudoProbe for each text section. The MCPseudoProbes 291 // are grouped by GUID of the functions where they are from and will be 292 // encoded by groups. In the comdat scenario where a text section really only 293 // contains the code of a function solely, the probes associated with a comdat 294 // function are still grouped by GUIDs due to inlining that can bring probes 295 // from different functions into one function. 296 MCProbeDivisionMap MCProbeDivisions; 297 298 public: 299 const MCProbeDivisionMap &getMCProbes() const { return MCProbeDivisions; } 300 301 bool empty() const { return MCProbeDivisions.empty(); } 302 303 void emit(MCObjectStreamer *MCOS); 304 }; 305 306 class MCPseudoProbeTable { 307 // A collection of MCPseudoProbe in the current module grouped by text 308 // sections. MCPseudoProbes will be encoded into a corresponding 309 // .pseudoprobe section. With functions emitted as separate comdats, 310 // a text section really only contains the code of a function solely, and the 311 // probes associated with the text section will be emitted into a standalone 312 // .pseudoprobe section that shares the same comdat group with the function. 313 MCPseudoProbeSection MCProbeSections; 314 315 public: 316 static void emit(MCObjectStreamer *MCOS); 317 318 MCPseudoProbeSection &getProbeSections() { return MCProbeSections; } 319 320 #ifndef NDEBUG 321 static int DdgPrintIndent; 322 #endif 323 }; 324 325 class MCPseudoProbeDecoder { 326 // GUID to PseudoProbeFuncDesc map. 327 GUIDProbeFunctionMap GUID2FuncDescMap; 328 329 // Address to probes map. 330 AddressProbesMap Address2ProbesMap; 331 332 // The dummy root of the inline trie, all the outlined function will directly 333 // be the children of the dummy root, all the inlined function will be the 334 // children of its inlineer. So the relation would be like: 335 // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2 336 MCDecodedPseudoProbeInlineTree DummyInlineRoot; 337 338 /// Points to the current location in the buffer. 339 const uint8_t *Data = nullptr; 340 341 /// Points to the end of the buffer. 342 const uint8_t *End = nullptr; 343 344 // Decoding helper function 345 template <typename T> ErrorOr<T> readUnencodedNumber(); 346 template <typename T> ErrorOr<T> readUnsignedNumber(); 347 template <typename T> ErrorOr<T> readSignedNumber(); 348 ErrorOr<StringRef> readString(uint32_t Size); 349 350 public: 351 // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map. 352 bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size); 353 354 // Decode pseudo_probe section to build address to probes map. 355 bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size); 356 357 // Decode pseudo_probe section to build address to probes map for specifed 358 // functions only. 359 bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size, 360 std::unordered_set<uint64_t> &GuildFilter); 361 362 bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, 363 uint64_t &LastAddr, 364 std::unordered_set<uint64_t> &GuildFilter); 365 366 // Print pseudo_probe_desc section info 367 void printGUID2FuncDescMap(raw_ostream &OS); 368 369 // Print pseudo_probe section info, used along with show-disassembly 370 void printProbeForAddress(raw_ostream &OS, uint64_t Address); 371 372 // do printProbeForAddress for all addresses 373 void printProbesForAllAddresses(raw_ostream &OS); 374 375 // Look up the probe of a call for the input address 376 const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const; 377 378 const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const; 379 380 // Helper function to populate one probe's inline stack into 381 // \p InlineContextStack. 382 // Current leaf location info will be added if IncludeLeaf is true 383 // Example: 384 // Current probe(bar:3) inlined at foo:2 then inlined at main:1 385 // IncludeLeaf = true, Output: [main:1, foo:2, bar:3] 386 // IncludeLeaf = false, Output: [main:1, foo:2] 387 void getInlineContextForProbe( 388 const MCDecodedPseudoProbe *Probe, 389 SmallVectorImpl<MCPseduoProbeFrameLocation> &InlineContextStack, 390 bool IncludeLeaf) const; 391 392 const AddressProbesMap &getAddress2ProbesMap() const { 393 return Address2ProbesMap; 394 } 395 396 AddressProbesMap &getAddress2ProbesMap() { return Address2ProbesMap; } 397 398 const GUIDProbeFunctionMap &getGUID2FuncDescMap() const { 399 return GUID2FuncDescMap; 400 } 401 402 const MCPseudoProbeFuncDesc * 403 getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) const; 404 405 const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const { 406 return DummyInlineRoot; 407 } 408 }; 409 410 } // end namespace llvm 411 412 #endif // LLVM_MC_MCPSEUDOPROBE_H 413