1 //===- MCPseudoProbe.h - Pseudo probe encoding support ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declaration of the MCPseudoProbe to support the pseudo
10 // probe encoding for AutoFDO. Pseudo probes together with their inline context
11 // are encoded in a DFS recursive way in the .pseudoprobe sections. For each
12 // .pseudoprobe section, the encoded binary data consist of a single or mutiple
13 // function records each for one outlined function. A function record has the
14 // following format :
15 //
16 // FUNCTION BODY (one for each outlined function present in the text section)
17 //    GUID (uint64)
18 //        GUID of the function's source name which may be different from the
19 //        actual binary linkage name. This GUID will be used to decode and
20 //        generate a profile against the source function name.
21 //    NPROBES (ULEB128)
22 //        Number of probes originating from this function.
23 //    NUM_INLINED_FUNCTIONS (ULEB128)
24 //        Number of callees inlined into this function, aka number of
25 //        first-level inlinees
26 //    PROBE RECORDS
27 //        A list of NPROBES entries. Each entry contains:
28 //          INDEX (ULEB128)
29 //          TYPE (uint4)
30 //            0 - block probe, 1 - indirect call, 2 - direct call
31 //          ATTRIBUTE (uint3)
32 //            1 - reserved
33 //          ADDRESS_TYPE (uint1)
34 //            0 - code address for regular probes (for downwards compatibility)
35 //              - GUID of linkage name for sentinel probes
36 //            1 - address delta
37 //          CODE_ADDRESS (uint64 or ULEB128)
38 //            code address or address delta, depending on ADDRESS_TYPE
39 //    INLINED FUNCTION RECORDS
40 //        A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
41 //        callees.  Each record contains:
42 //          INLINE SITE
43 //            ID of the callsite probe (ULEB128)
44 //          FUNCTION BODY
45 //            A FUNCTION BODY entry describing the inlined function.
46 //
47 // TODO: retire the ADDRESS_TYPE encoding for code addresses once compatibility
48 // is no longer an issue.
49 //===----------------------------------------------------------------------===//
50 
51 #ifndef LLVM_MC_MCPSEUDOPROBE_H
52 #define LLVM_MC_MCPSEUDOPROBE_H
53 
54 #include "llvm/ADT/DenseSet.h"
55 #include "llvm/ADT/SmallVector.h"
56 #include "llvm/ADT/StringRef.h"
57 #include "llvm/IR/PseudoProbe.h"
58 #include "llvm/Support/ErrorOr.h"
59 #include <list>
60 #include <map>
61 #include <memory>
62 #include <string>
63 #include <tuple>
64 #include <type_traits>
65 #include <unordered_map>
66 #include <unordered_set>
67 #include <vector>
68 
69 namespace llvm {
70 
71 class MCSymbol;
72 class MCObjectStreamer;
73 class raw_ostream;
74 
75 enum class MCPseudoProbeFlag {
76   // If set, indicates that the probe is encoded as an address delta
77   // instead of a real code address.
78   AddressDelta = 0x1,
79 };
80 
81 // Function descriptor decoded from .pseudo_probe_desc section
82 struct MCPseudoProbeFuncDesc {
83   uint64_t FuncGUID = 0;
84   uint64_t FuncHash = 0;
85   std::string FuncName;
86 
87   MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name)
88       : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){};
89 
90   void print(raw_ostream &OS);
91 };
92 
93 class MCDecodedPseudoProbe;
94 
95 // An inline frame has the form <CalleeGuid, ProbeID>
96 using InlineSite = std::tuple<uint64_t, uint32_t>;
97 using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
98 // GUID to PseudoProbeFuncDesc map
99 using GUIDProbeFunctionMap =
100     std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
101 // Address to pseudo probes map.
102 using AddressProbesMap =
103     std::unordered_map<uint64_t, std::list<MCDecodedPseudoProbe>>;
104 
105 class MCDecodedPseudoProbeInlineTree;
106 
107 class MCPseudoProbeBase {
108 protected:
109   uint64_t Guid;
110   uint64_t Index;
111   uint8_t Attributes;
112   uint8_t Type;
113   // The value should be equal to PseudoProbeReservedId::Last + 1 which is
114   // defined in SampleProfileProbe.h. The header file is not included here to
115   // reduce the dependency from MC to IPO.
116   const static uint32_t PseudoProbeFirstId = 1;
117 
118 public:
119   MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T)
120       : Guid(G), Index(I), Attributes(At), Type(T) {}
121 
122   bool isEntry() const { return Index == PseudoProbeFirstId; }
123 
124   uint64_t getGuid() const { return Guid; }
125 
126   uint64_t getIndex() const { return Index; }
127 
128   uint8_t getAttributes() const { return Attributes; }
129 
130   uint8_t getType() const { return Type; }
131 
132   bool isBlock() const {
133     return Type == static_cast<uint8_t>(PseudoProbeType::Block);
134   }
135 
136   bool isIndirectCall() const {
137     return Type == static_cast<uint8_t>(PseudoProbeType::IndirectCall);
138   }
139 
140   bool isDirectCall() const {
141     return Type == static_cast<uint8_t>(PseudoProbeType::DirectCall);
142   }
143 
144   bool isCall() const { return isIndirectCall() || isDirectCall(); }
145 
146   void setAttributes(uint8_t Attr) { Attributes = Attr; }
147 };
148 
149 /// Instances of this class represent a pseudo probe instance for a pseudo probe
150 /// table entry, which is created during a machine instruction is assembled and
151 /// uses an address from a temporary label created at the current address in the
152 /// current section.
153 class MCPseudoProbe : public MCPseudoProbeBase {
154   MCSymbol *Label;
155 
156 public:
157   MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
158                 uint64_t Attributes)
159       : MCPseudoProbeBase(Guid, Index, Attributes, Type), Label(Label) {
160     assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
161     assert(Attributes <= 0xFF &&
162            "Probe attributes too big to encode, exceeding 2^16");
163   }
164 
165   MCSymbol *getLabel() const { return Label; }
166   void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
167 };
168 
169 // Represents a callsite with caller function name and probe id
170 using MCPseduoProbeFrameLocation = std::pair<StringRef, uint32_t>;
171 
172 class MCDecodedPseudoProbe : public MCPseudoProbeBase {
173   uint64_t Address;
174   MCDecodedPseudoProbeInlineTree *InlineTree;
175 
176 public:
177   MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K,
178                        uint8_t At, MCDecodedPseudoProbeInlineTree *Tree)
179       : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K)), Address(Ad),
180         InlineTree(Tree){};
181 
182   uint64_t getAddress() const { return Address; }
183 
184   void setAddress(uint64_t Addr) { Address = Addr; }
185 
186   MCDecodedPseudoProbeInlineTree *getInlineTreeNode() const {
187     return InlineTree;
188   }
189 
190   // Get the inlined context by traversing current inline tree backwards,
191   // each tree node has its InlineSite which is taken as the context.
192   // \p ContextStack is populated in root to leaf order
193   void
194   getInlineContext(SmallVectorImpl<MCPseduoProbeFrameLocation> &ContextStack,
195                    const GUIDProbeFunctionMap &GUID2FuncMAP) const;
196 
197   // Helper function to get the string from context stack
198   std::string
199   getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const;
200 
201   // Print pseudo probe while disassembling
202   void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP,
203              bool ShowName) const;
204 };
205 
206 template <typename ProbeType, typename DerivedProbeInlineTreeType>
207 class MCPseudoProbeInlineTreeBase {
208   struct InlineSiteHash {
209     uint64_t operator()(const InlineSite &Site) const {
210       return std::get<0>(Site) ^ std::get<1>(Site);
211     }
212   };
213 
214 protected:
215   // Track children (e.g. inlinees) of current context
216   using InlinedProbeTreeMap = std::unordered_map<
217       InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>;
218   InlinedProbeTreeMap Children;
219   // Set of probes that come with the function.
220   std::vector<ProbeType> Probes;
221   MCPseudoProbeInlineTreeBase() {
222     static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase,
223                                   DerivedProbeInlineTreeType>::value,
224                   "DerivedProbeInlineTreeType must be subclass of "
225                   "MCPseudoProbeInlineTreeBase");
226   }
227 
228 public:
229   uint64_t Guid = 0;
230 
231   // Root node has a GUID 0.
232   bool isRoot() const { return Guid == 0; }
233   InlinedProbeTreeMap &getChildren() { return Children; }
234   const InlinedProbeTreeMap &getChildren() const { return Children; }
235   std::vector<ProbeType> &getProbes() { return Probes; }
236   void addProbes(ProbeType Probe) { Probes.push_back(Probe); }
237   // Caller node of the inline site
238   MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent;
239   DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) {
240     auto Ret = Children.emplace(
241         Site, std::make_unique<DerivedProbeInlineTreeType>(Site));
242     Ret.first->second->Parent = this;
243     return Ret.first->second.get();
244   };
245 };
246 
247 // A Tri-tree based data structure to group probes by inline stack.
248 // A tree is allocated for a standalone .text section. A fake
249 // instance is created as the root of a tree.
250 // A real instance of this class is created for each function, either a
251 // not inlined function that has code in .text section or an inlined function.
252 class MCPseudoProbeInlineTree
253     : public MCPseudoProbeInlineTreeBase<MCPseudoProbe,
254                                          MCPseudoProbeInlineTree> {
255 public:
256   MCPseudoProbeInlineTree() = default;
257   MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; }
258   MCPseudoProbeInlineTree(const InlineSite &Site) {
259     this->Guid = std::get<0>(Site);
260   }
261 
262   // MCPseudoProbeInlineTree method based on Inlinees
263   void addPseudoProbe(const MCPseudoProbe &Probe,
264                       const MCPseudoProbeInlineStack &InlineStack);
265   void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe);
266 };
267 
268 // inline tree node for the decoded pseudo probe
269 class MCDecodedPseudoProbeInlineTree
270     : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
271                                          MCDecodedPseudoProbeInlineTree> {
272 public:
273   InlineSite ISite;
274   // Used for decoding
275   uint32_t ChildrenToProcess = 0;
276 
277   MCDecodedPseudoProbeInlineTree() = default;
278   MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
279 
280   // Return false if it's a dummy inline site
281   bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
282 };
283 
284 /// Instances of this class represent the pseudo probes inserted into a compile
285 /// unit.
286 class MCPseudoProbeSections {
287 public:
288   void addPseudoProbe(MCSymbol *FuncSym, const MCPseudoProbe &Probe,
289                       const MCPseudoProbeInlineStack &InlineStack) {
290     MCProbeDivisions[FuncSym].addPseudoProbe(Probe, InlineStack);
291   }
292 
293   // TODO: Sort by getOrdinal to ensure a determinstic section order
294   using MCProbeDivisionMap = std::map<MCSymbol *, MCPseudoProbeInlineTree>;
295 
296 private:
297   // A collection of MCPseudoProbe for each function. The MCPseudoProbes are
298   // grouped by GUIDs due to inlining that can bring probes from different
299   // functions into one function.
300   MCProbeDivisionMap MCProbeDivisions;
301 
302 public:
303   const MCProbeDivisionMap &getMCProbes() const { return MCProbeDivisions; }
304 
305   bool empty() const { return MCProbeDivisions.empty(); }
306 
307   void emit(MCObjectStreamer *MCOS);
308 };
309 
310 class MCPseudoProbeTable {
311   // A collection of MCPseudoProbe in the current module grouped by
312   // functions. MCPseudoProbes will be encoded into a corresponding
313   // .pseudoprobe section. With functions emitted as separate comdats,
314   // a text section really only contains the code of a function solely, and the
315   // probes associated with the text section will be emitted into a standalone
316   // .pseudoprobe section that shares the same comdat group with the function.
317   MCPseudoProbeSections MCProbeSections;
318 
319 public:
320   static void emit(MCObjectStreamer *MCOS);
321 
322   MCPseudoProbeSections &getProbeSections() { return MCProbeSections; }
323 
324 #ifndef NDEBUG
325   static int DdgPrintIndent;
326 #endif
327 };
328 
329 class MCPseudoProbeDecoder {
330   // GUID to PseudoProbeFuncDesc map.
331   GUIDProbeFunctionMap GUID2FuncDescMap;
332 
333   // Address to probes map.
334   AddressProbesMap Address2ProbesMap;
335 
336   // The dummy root of the inline trie, all the outlined function will directly
337   // be the children of the dummy root, all the inlined function will be the
338   // children of its inlineer. So the relation would be like:
339   // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2
340   MCDecodedPseudoProbeInlineTree DummyInlineRoot;
341 
342   /// Points to the current location in the buffer.
343   const uint8_t *Data = nullptr;
344 
345   /// Points to the end of the buffer.
346   const uint8_t *End = nullptr;
347 
348   /// Whether encoding is based on a starting probe with absolute code address.
349   bool EncodingIsAddrBased = false;
350 
351   // Decoding helper function
352   template <typename T> ErrorOr<T> readUnencodedNumber();
353   template <typename T> ErrorOr<T> readUnsignedNumber();
354   template <typename T> ErrorOr<T> readSignedNumber();
355   ErrorOr<StringRef> readString(uint32_t Size);
356 
357 public:
358   using Uint64Set = DenseSet<uint64_t>;
359   using Uint64Map = DenseMap<uint64_t, uint64_t>;
360 
361   // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map.
362   bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size);
363 
364   // Decode pseudo_probe section to build address to probes map for specifed
365   // functions only.
366   bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size,
367                              const Uint64Set &GuildFilter,
368                              const Uint64Map &FuncStartAddrs);
369 
370   bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
371                              uint64_t &LastAddr, const Uint64Set &GuildFilter,
372                              const Uint64Map &FuncStartAddrs);
373 
374   // Print pseudo_probe_desc section info
375   void printGUID2FuncDescMap(raw_ostream &OS);
376 
377   // Print pseudo_probe section info, used along with show-disassembly
378   void printProbeForAddress(raw_ostream &OS, uint64_t Address);
379 
380   // do printProbeForAddress for all addresses
381   void printProbesForAllAddresses(raw_ostream &OS);
382 
383   // Look up the probe of a call for the input address
384   const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const;
385 
386   const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const;
387 
388   // Helper function to populate one probe's inline stack into
389   // \p InlineContextStack.
390   // Current leaf location info will be added if IncludeLeaf is true
391   // Example:
392   //  Current probe(bar:3) inlined at foo:2 then inlined at main:1
393   //  IncludeLeaf = true,  Output: [main:1, foo:2, bar:3]
394   //  IncludeLeaf = false, Output: [main:1, foo:2]
395   void getInlineContextForProbe(
396       const MCDecodedPseudoProbe *Probe,
397       SmallVectorImpl<MCPseduoProbeFrameLocation> &InlineContextStack,
398       bool IncludeLeaf) const;
399 
400   const AddressProbesMap &getAddress2ProbesMap() const {
401     return Address2ProbesMap;
402   }
403 
404   AddressProbesMap &getAddress2ProbesMap() { return Address2ProbesMap; }
405 
406   const GUIDProbeFunctionMap &getGUID2FuncDescMap() const {
407     return GUID2FuncDescMap;
408   }
409 
410   const MCPseudoProbeFuncDesc *
411   getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) const;
412 
413   const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
414     return DummyInlineRoot;
415   }
416 };
417 
418 } // end namespace llvm
419 
420 #endif // LLVM_MC_MCPSEUDOPROBE_H
421