1 //===- MCPseudoProbe.h - Pseudo probe encoding support ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declaration of the MCPseudoProbe to support the pseudo
10 // probe encoding for AutoFDO. Pseudo probes together with their inline context
11 // are encoded in a DFS recursive way in the .pseudoprobe sections. For each
12 // .pseudoprobe section, the encoded binary data consist of a single or mutiple
13 // function records each for one outlined function. A function record has the
14 // following format :
15 //
16 // FUNCTION BODY (one for each outlined function present in the text section)
17 //    GUID (uint64)
18 //        GUID of the function
19 //    NPROBES (ULEB128)
20 //        Number of probes originating from this function.
21 //    NUM_INLINED_FUNCTIONS (ULEB128)
22 //        Number of callees inlined into this function, aka number of
23 //        first-level inlinees
24 //    PROBE RECORDS
25 //        A list of NPROBES entries. Each entry contains:
26 //          INDEX (ULEB128)
27 //          TYPE (uint4)
28 //            0 - block probe, 1 - indirect call, 2 - direct call
29 //          ATTRIBUTE (uint3)
30 //            1 - reserved
31 //          ADDRESS_TYPE (uint1)
32 //            0 - code address, 1 - address delta
33 //          CODE_ADDRESS (uint64 or ULEB128)
34 //            code address or address delta, depending on ADDRESS_TYPE
35 //    INLINED FUNCTION RECORDS
36 //        A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
37 //        callees.  Each record contains:
38 //          INLINE SITE
39 //            ID of the callsite probe (ULEB128)
40 //          FUNCTION BODY
41 //            A FUNCTION BODY entry describing the inlined function.
42 //===----------------------------------------------------------------------===//
43 
44 #ifndef LLVM_MC_MCPSEUDOPROBE_H
45 #define LLVM_MC_MCPSEUDOPROBE_H
46 
47 #include "llvm/ADT/SmallVector.h"
48 #include "llvm/ADT/StringRef.h"
49 #include "llvm/IR/PseudoProbe.h"
50 #include "llvm/Support/ErrorOr.h"
51 #include <list>
52 #include <map>
53 #include <memory>
54 #include <string>
55 #include <tuple>
56 #include <type_traits>
57 #include <unordered_map>
58 #include <vector>
59 
60 namespace llvm {
61 
62 class MCSection;
63 class MCStreamer;
64 class MCSymbol;
65 class MCObjectStreamer;
66 class raw_ostream;
67 
68 enum class MCPseudoProbeFlag {
69   // If set, indicates that the probe is encoded as an address delta
70   // instead of a real code address.
71   AddressDelta = 0x1,
72 };
73 
74 // Function descriptor decoded from .pseudo_probe_desc section
75 struct MCPseudoProbeFuncDesc {
76   uint64_t FuncGUID = 0;
77   uint64_t FuncHash = 0;
78   std::string FuncName;
79 
MCPseudoProbeFuncDescMCPseudoProbeFuncDesc80   MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name)
81       : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){};
82 
83   void print(raw_ostream &OS);
84 };
85 
86 class MCPseudoProbe;
87 class MCDecodedPseudoProbe;
88 
89 // An inline frame has the form <Guid, ProbeID>
90 using InlineSite = std::tuple<uint64_t, uint32_t>;
91 using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
92 // GUID to PseudoProbeFuncDesc map
93 using GUIDProbeFunctionMap =
94     std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
95 // Address to pseudo probes map.
96 using AddressProbesMap =
97     std::unordered_map<uint64_t, std::list<MCDecodedPseudoProbe>>;
98 
99 class MCPseudoProbeInlineTree;
100 class MCDecodedPseudoProbeInlineTree;
101 
102 class MCPseudoProbeBase {
103 protected:
104   uint64_t Guid;
105   uint64_t Index;
106   uint8_t Attributes;
107   uint8_t Type;
108   // The value should be equal to PseudoProbeReservedId::Last + 1 which is
109   // defined in SampleProfileProbe.h. The header file is not included here to
110   // reduce the dependency from MC to IPO.
111   const static uint32_t PseudoProbeFirstId = 1;
112 
113 public:
MCPseudoProbeBase(uint64_t G,uint64_t I,uint64_t At,uint8_t T)114   MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T)
115       : Guid(G), Index(I), Attributes(At), Type(T) {}
116 
isEntry()117   bool isEntry() const { return Index == PseudoProbeFirstId; }
118 
getGuid()119   uint64_t getGuid() const { return Guid; }
120 
getIndex()121   uint64_t getIndex() const { return Index; }
122 
getAttributes()123   uint8_t getAttributes() const { return Attributes; }
124 
getType()125   uint8_t getType() const { return Type; }
126 
isBlock()127   bool isBlock() const {
128     return Type == static_cast<uint8_t>(PseudoProbeType::Block);
129   }
130 
isIndirectCall()131   bool isIndirectCall() const {
132     return Type == static_cast<uint8_t>(PseudoProbeType::IndirectCall);
133   }
134 
isDirectCall()135   bool isDirectCall() const {
136     return Type == static_cast<uint8_t>(PseudoProbeType::DirectCall);
137   }
138 
isCall()139   bool isCall() const { return isIndirectCall() || isDirectCall(); }
140 
setAttributes(uint8_t Attr)141   void setAttributes(uint8_t Attr) { Attributes = Attr; }
142 };
143 
144 /// Instances of this class represent a pseudo probe instance for a pseudo probe
145 /// table entry, which is created during a machine instruction is assembled and
146 /// uses an address from a temporary label created at the current address in the
147 /// current section.
148 class MCPseudoProbe : public MCPseudoProbeBase {
149   MCSymbol *Label;
150 
151 public:
MCPseudoProbe(MCSymbol * Label,uint64_t Guid,uint64_t Index,uint64_t Type,uint64_t Attributes)152   MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
153                 uint64_t Attributes)
154       : MCPseudoProbeBase(Guid, Index, Attributes, Type), Label(Label) {
155     assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
156     assert(Attributes <= 0xFF &&
157            "Probe attributes too big to encode, exceeding 2^16");
158   }
159 
getLabel()160   MCSymbol *getLabel() const { return Label; }
161   void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
162 };
163 
164 // Represents a callsite with caller function name and probe id
165 using MCPseduoProbeFrameLocation = std::pair<StringRef, uint32_t>;
166 
167 class MCDecodedPseudoProbe : public MCPseudoProbeBase {
168   uint64_t Address;
169   MCDecodedPseudoProbeInlineTree *InlineTree;
170 
171 public:
MCDecodedPseudoProbe(uint64_t Ad,uint64_t G,uint32_t I,PseudoProbeType K,uint8_t At,MCDecodedPseudoProbeInlineTree * Tree)172   MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K,
173                        uint8_t At, MCDecodedPseudoProbeInlineTree *Tree)
174       : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K)), Address(Ad),
175         InlineTree(Tree){};
176 
getAddress()177   uint64_t getAddress() const { return Address; }
178 
setAddress(uint64_t Addr)179   void setAddress(uint64_t Addr) { Address = Addr; }
180 
getInlineTreeNode()181   MCDecodedPseudoProbeInlineTree *getInlineTreeNode() const {
182     return InlineTree;
183   }
184 
185   // Get the inlined context by traversing current inline tree backwards,
186   // each tree node has its InlineSite which is taken as the context.
187   // \p ContextStack is populated in root to leaf order
188   void
189   getInlineContext(SmallVectorImpl<MCPseduoProbeFrameLocation> &ContextStack,
190                    const GUIDProbeFunctionMap &GUID2FuncMAP) const;
191 
192   // Helper function to get the string from context stack
193   std::string
194   getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const;
195 
196   // Print pseudo probe while disassembling
197   void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP,
198              bool ShowName) const;
199 };
200 
201 template <typename ProbeType, typename DerivedProbeInlineTreeType>
202 class MCPseudoProbeInlineTreeBase {
203   struct InlineSiteHash {
operatorInlineSiteHash204     uint64_t operator()(const InlineSite &Site) const {
205       return std::get<0>(Site) ^ std::get<1>(Site);
206     }
207   };
208 
209 protected:
210   // Track children (e.g. inlinees) of current context
211   using InlinedProbeTreeMap = std::unordered_map<
212       InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>;
213   InlinedProbeTreeMap Children;
214   // Set of probes that come with the function.
215   std::vector<ProbeType> Probes;
MCPseudoProbeInlineTreeBase()216   MCPseudoProbeInlineTreeBase() {
217     static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase,
218                                   DerivedProbeInlineTreeType>::value,
219                   "DerivedProbeInlineTreeType must be subclass of "
220                   "MCPseudoProbeInlineTreeBase");
221   }
222 
223 public:
224   uint64_t Guid = 0;
225 
226   // Root node has a GUID 0.
isRoot()227   bool isRoot() const { return Guid == 0; }
getChildren()228   InlinedProbeTreeMap &getChildren() { return Children; }
getChildren()229   const InlinedProbeTreeMap &getChildren() const { return Children; }
getProbes()230   std::vector<ProbeType> &getProbes() { return Probes; }
addProbes(ProbeType Probe)231   void addProbes(ProbeType Probe) { Probes.push_back(Probe); }
232   // Caller node of the inline site
233   MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent;
getOrAddNode(const InlineSite & Site)234   DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) {
235     auto Ret = Children.emplace(
236         Site, std::make_unique<DerivedProbeInlineTreeType>(Site));
237     Ret.first->second->Parent = this;
238     return Ret.first->second.get();
239   };
240 };
241 
242 // A Tri-tree based data structure to group probes by inline stack.
243 // A tree is allocated for a standalone .text section. A fake
244 // instance is created as the root of a tree.
245 // A real instance of this class is created for each function, either a
246 // not inlined function that has code in .text section or an inlined function.
247 class MCPseudoProbeInlineTree
248     : public MCPseudoProbeInlineTreeBase<MCPseudoProbe,
249                                          MCPseudoProbeInlineTree> {
250 public:
251   MCPseudoProbeInlineTree() = default;
MCPseudoProbeInlineTree(uint64_t Guid)252   MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; }
MCPseudoProbeInlineTree(const InlineSite & Site)253   MCPseudoProbeInlineTree(const InlineSite &Site) {
254     this->Guid = std::get<0>(Site);
255   }
256 
257   // MCPseudoProbeInlineTree method based on Inlinees
258   void addPseudoProbe(const MCPseudoProbe &Probe,
259                       const MCPseudoProbeInlineStack &InlineStack);
260   void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe);
261 };
262 
263 // inline tree node for the decoded pseudo probe
264 class MCDecodedPseudoProbeInlineTree
265     : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
266                                          MCDecodedPseudoProbeInlineTree> {
267 public:
268   InlineSite ISite;
269   // Used for decoding
270   uint32_t ChildrenToProcess = 0;
271 
MCDecodedPseudoProbeInlineTree()272   MCDecodedPseudoProbeInlineTree(){};
MCDecodedPseudoProbeInlineTree(const InlineSite & Site)273   MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
274 
275   // Return false if it's a dummy inline site
hasInlineSite()276   bool hasInlineSite() const { return std::get<0>(ISite) != 0; }
277 };
278 
279 /// Instances of this class represent the pseudo probes inserted into a compile
280 /// unit.
281 class MCPseudoProbeSection {
282 public:
addPseudoProbe(MCSection * Sec,const MCPseudoProbe & Probe,const MCPseudoProbeInlineStack & InlineStack)283   void addPseudoProbe(MCSection *Sec, const MCPseudoProbe &Probe,
284                       const MCPseudoProbeInlineStack &InlineStack) {
285     MCProbeDivisions[Sec].addPseudoProbe(Probe, InlineStack);
286   }
287 
288   // TODO: Sort by getOrdinal to ensure a determinstic section order
289   using MCProbeDivisionMap = std::map<MCSection *, MCPseudoProbeInlineTree>;
290 
291 private:
292   // A collection of MCPseudoProbe for each text section. The MCPseudoProbes
293   // are grouped by GUID of the functions where they are from and will be
294   // encoded by groups. In the comdat scenario where a text section really only
295   // contains the code of a function solely, the probes associated with a comdat
296   // function are still grouped by GUIDs due to inlining that can bring probes
297   // from different functions into one function.
298   MCProbeDivisionMap MCProbeDivisions;
299 
300 public:
getMCProbes()301   const MCProbeDivisionMap &getMCProbes() const { return MCProbeDivisions; }
302 
empty()303   bool empty() const { return MCProbeDivisions.empty(); }
304 
305   void emit(MCObjectStreamer *MCOS);
306 };
307 
308 class MCPseudoProbeTable {
309   // A collection of MCPseudoProbe in the current module grouped by text
310   // sections. MCPseudoProbes will be encoded into a corresponding
311   // .pseudoprobe section. With functions emitted as separate comdats,
312   // a text section really only contains the code of a function solely, and the
313   // probes associated with the text section will be emitted into a standalone
314   // .pseudoprobe section that shares the same comdat group with the function.
315   MCPseudoProbeSection MCProbeSections;
316 
317 public:
318   static void emit(MCObjectStreamer *MCOS);
319 
getProbeSections()320   MCPseudoProbeSection &getProbeSections() { return MCProbeSections; }
321 
322 #ifndef NDEBUG
323   static int DdgPrintIndent;
324 #endif
325 };
326 
327 class MCPseudoProbeDecoder {
328   // GUID to PseudoProbeFuncDesc map.
329   GUIDProbeFunctionMap GUID2FuncDescMap;
330 
331   // Address to probes map.
332   AddressProbesMap Address2ProbesMap;
333 
334   // The dummy root of the inline trie, all the outlined function will directly
335   // be the children of the dummy root, all the inlined function will be the
336   // children of its inlineer. So the relation would be like:
337   // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2
338   MCDecodedPseudoProbeInlineTree DummyInlineRoot;
339 
340   /// Points to the current location in the buffer.
341   const uint8_t *Data = nullptr;
342 
343   /// Points to the end of the buffer.
344   const uint8_t *End = nullptr;
345 
346   // Decoding helper function
347   template <typename T> ErrorOr<T> readUnencodedNumber();
348   template <typename T> ErrorOr<T> readUnsignedNumber();
349   template <typename T> ErrorOr<T> readSignedNumber();
350   ErrorOr<StringRef> readString(uint32_t Size);
351 
352 public:
353   // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map.
354   bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size);
355 
356   // Decode pseudo_probe section to build address to probes map.
357   bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size);
358 
359   // Print pseudo_probe_desc section info
360   void printGUID2FuncDescMap(raw_ostream &OS);
361 
362   // Print pseudo_probe section info, used along with show-disassembly
363   void printProbeForAddress(raw_ostream &OS, uint64_t Address);
364 
365   // do printProbeForAddress for all addresses
366   void printProbesForAllAddresses(raw_ostream &OS);
367 
368   // Look up the probe of a call for the input address
369   const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const;
370 
371   const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const;
372 
373   // Helper function to populate one probe's inline stack into
374   // \p InlineContextStack.
375   // Current leaf location info will be added if IncludeLeaf is true
376   // Example:
377   //  Current probe(bar:3) inlined at foo:2 then inlined at main:1
378   //  IncludeLeaf = true,  Output: [main:1, foo:2, bar:3]
379   //  IncludeLeaf = false, Output: [main:1, foo:2]
380   void getInlineContextForProbe(
381       const MCDecodedPseudoProbe *Probe,
382       SmallVectorImpl<MCPseduoProbeFrameLocation> &InlineContextStack,
383       bool IncludeLeaf) const;
384 
getAddress2ProbesMap()385   const AddressProbesMap &getAddress2ProbesMap() const {
386     return Address2ProbesMap;
387   }
388 
getAddress2ProbesMap()389   AddressProbesMap &getAddress2ProbesMap() { return Address2ProbesMap; }
390 
getGUID2FuncDescMap()391   const GUIDProbeFunctionMap &getGUID2FuncDescMap() const {
392     return GUID2FuncDescMap;
393   }
394 
395   const MCPseudoProbeFuncDesc *
396   getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) const;
397 
getDummyInlineRoot()398   const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
399     return DummyInlineRoot;
400   }
401 };
402 
403 } // end namespace llvm
404 
405 #endif // LLVM_MC_MCPSEUDOPROBE_H
406