1 //===- MCPseudoProbe.h - Pseudo probe encoding support ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declaration of the MCPseudoProbe to support the pseudo
10 // probe encoding for AutoFDO. Pseudo probes together with their inline context
11 // are encoded in a DFS recursive way in the .pseudoprobe sections. For each
12 // .pseudoprobe section, the encoded binary data consist of a single or mutiple
13 // function records each for one outlined function. A function record has the
14 // following format :
15 //
16 // FUNCTION BODY (one for each outlined function present in the text section)
17 //    GUID (uint64)
18 //        GUID of the function's source name which may be different from the
19 //        actual binary linkage name. This GUID will be used to decode and
20 //        generate a profile against the source function name.
21 //    NPROBES (ULEB128)
22 //        Number of probes originating from this function.
23 //    NUM_INLINED_FUNCTIONS (ULEB128)
24 //        Number of callees inlined into this function, aka number of
25 //        first-level inlinees
26 //    PROBE RECORDS
27 //        A list of NPROBES entries. Each entry contains:
28 //          INDEX (ULEB128)
29 //          TYPE (uint4)
30 //            0 - block probe, 1 - indirect call, 2 - direct call
31 //          ATTRIBUTE (uint3)
32 //            1 - reserved
33 //            2 - Sentinel
34 //            4 - HasDiscriminator
35 //          ADDRESS_TYPE (uint1)
36 //            0 - code address for regular probes (for downwards compatibility)
37 //              - GUID of linkage name for sentinel probes
38 //            1 - address delta
39 //          CODE_ADDRESS (uint64 or ULEB128)
40 //            code address or address delta, depending on ADDRESS_TYPE
41 //          DISCRIMINATOR (ULEB128) if HasDiscriminator
42 //    INLINED FUNCTION RECORDS
43 //        A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
44 //        callees.  Each record contains:
45 //          INLINE SITE
46 //            ID of the callsite probe (ULEB128)
47 //          FUNCTION BODY
48 //            A FUNCTION BODY entry describing the inlined function.
49 //
50 // TODO: retire the ADDRESS_TYPE encoding for code addresses once compatibility
51 // is no longer an issue.
52 //===----------------------------------------------------------------------===//
53 
54 #ifndef LLVM_MC_MCPSEUDOPROBE_H
55 #define LLVM_MC_MCPSEUDOPROBE_H
56 
57 #include "llvm/ADT/DenseMap.h"
58 #include "llvm/ADT/DenseSet.h"
59 #include "llvm/ADT/SmallVector.h"
60 #include "llvm/ADT/StringRef.h"
61 #include "llvm/IR/PseudoProbe.h"
62 #include "llvm/Support/ErrorOr.h"
63 #include <list>
64 #include <memory>
65 #include <string>
66 #include <tuple>
67 #include <type_traits>
68 #include <unordered_map>
69 #include <unordered_set>
70 #include <vector>
71 
72 namespace llvm {
73 
74 class MCSymbol;
75 class MCObjectStreamer;
76 class raw_ostream;
77 
78 enum class MCPseudoProbeFlag {
79   // If set, indicates that the probe is encoded as an address delta
80   // instead of a real code address.
81   AddressDelta = 0x1,
82 };
83 
84 // Function descriptor decoded from .pseudo_probe_desc section
85 struct MCPseudoProbeFuncDesc {
86   uint64_t FuncGUID = 0;
87   uint64_t FuncHash = 0;
88   std::string FuncName;
89 
90   MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name)
91       : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){};
92 
93   void print(raw_ostream &OS);
94 };
95 
96 class MCDecodedPseudoProbe;
97 
98 // An inline frame has the form <CalleeGuid, ProbeID>
99 using InlineSite = std::tuple<uint64_t, uint32_t>;
100 using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
101 // GUID to PseudoProbeFuncDesc map
102 using GUIDProbeFunctionMap =
103     std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
104 // Address to pseudo probes map.
105 using AddressProbesMap =
106     std::unordered_map<uint64_t, std::list<MCDecodedPseudoProbe>>;
107 
108 class MCDecodedPseudoProbeInlineTree;
109 
110 class MCPseudoProbeBase {
111 protected:
112   uint64_t Guid;
113   uint64_t Index;
114   uint32_t Discriminator;
115   uint8_t Attributes;
116   uint8_t Type;
117   // The value should be equal to PseudoProbeReservedId::Last + 1 which is
118   // defined in SampleProfileProbe.h. The header file is not included here to
119   // reduce the dependency from MC to IPO.
120   const static uint32_t PseudoProbeFirstId = 1;
121 
122 public:
123   MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T, uint32_t D)
124       : Guid(G), Index(I), Discriminator(D), Attributes(At), Type(T) {}
125 
126   bool isEntry() const { return Index == PseudoProbeFirstId; }
127 
128   uint64_t getGuid() const { return Guid; }
129 
130   uint64_t getIndex() const { return Index; }
131 
132   uint32_t getDiscriminator() const { return Discriminator; }
133 
134   uint8_t getAttributes() const { return Attributes; }
135 
136   uint8_t getType() const { return Type; }
137 
138   bool isBlock() const {
139     return Type == static_cast<uint8_t>(PseudoProbeType::Block);
140   }
141 
142   bool isIndirectCall() const {
143     return Type == static_cast<uint8_t>(PseudoProbeType::IndirectCall);
144   }
145 
146   bool isDirectCall() const {
147     return Type == static_cast<uint8_t>(PseudoProbeType::DirectCall);
148   }
149 
150   bool isCall() const { return isIndirectCall() || isDirectCall(); }
151 
152   void setAttributes(uint8_t Attr) { Attributes = Attr; }
153 };
154 
155 /// Instances of this class represent a pseudo probe instance for a pseudo probe
156 /// table entry, which is created during a machine instruction is assembled and
157 /// uses an address from a temporary label created at the current address in the
158 /// current section.
159 class MCPseudoProbe : public MCPseudoProbeBase {
160   MCSymbol *Label;
161 
162 public:
163   MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
164                 uint64_t Attributes, uint32_t Discriminator)
165       : MCPseudoProbeBase(Guid, Index, Attributes, Type, Discriminator),
166         Label(Label) {
167     assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
168     assert(Attributes <= 0xFF &&
169            "Probe attributes too big to encode, exceeding 2^16");
170   }
171 
172   MCSymbol *getLabel() const { return Label; }
173   void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
174 };
175 
176 // Represents a callsite with caller function name and probe id
177 using MCPseduoProbeFrameLocation = std::pair<StringRef, uint32_t>;
178 
179 class MCDecodedPseudoProbe : public MCPseudoProbeBase {
180   uint64_t Address;
181   MCDecodedPseudoProbeInlineTree *InlineTree;
182 
183 public:
184   MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K,
185                        uint8_t At, uint32_t D,
186                        MCDecodedPseudoProbeInlineTree *Tree)
187       : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K), D), Address(Ad),
188         InlineTree(Tree){};
189 
190   uint64_t getAddress() const { return Address; }
191 
192   void setAddress(uint64_t Addr) { Address = Addr; }
193 
194   MCDecodedPseudoProbeInlineTree *getInlineTreeNode() const {
195     return InlineTree;
196   }
197 
198   // Get the inlined context by traversing current inline tree backwards,
199   // each tree node has its InlineSite which is taken as the context.
200   // \p ContextStack is populated in root to leaf order
201   void
202   getInlineContext(SmallVectorImpl<MCPseduoProbeFrameLocation> &ContextStack,
203                    const GUIDProbeFunctionMap &GUID2FuncMAP) const;
204 
205   // Helper function to get the string from context stack
206   std::string
207   getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const;
208 
209   // Print pseudo probe while disassembling
210   void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP,
211              bool ShowName) const;
212 };
213 
214 template <typename ProbeType, typename DerivedProbeInlineTreeType>
215 class MCPseudoProbeInlineTreeBase {
216   struct InlineSiteHash {
217     uint64_t operator()(const InlineSite &Site) const {
218       return std::get<0>(Site) ^ std::get<1>(Site);
219     }
220   };
221 
222 protected:
223   // Track children (e.g. inlinees) of current context
224   using InlinedProbeTreeMap = std::unordered_map<
225       InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>;
226   InlinedProbeTreeMap Children;
227   // Set of probes that come with the function.
228   std::vector<ProbeType> Probes;
229   MCPseudoProbeInlineTreeBase() {
230     static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase,
231                                   DerivedProbeInlineTreeType>::value,
232                   "DerivedProbeInlineTreeType must be subclass of "
233                   "MCPseudoProbeInlineTreeBase");
234   }
235 
236 public:
237   uint64_t Guid = 0;
238 
239   // Root node has a GUID 0.
240   bool isRoot() const { return Guid == 0; }
241   InlinedProbeTreeMap &getChildren() { return Children; }
242   const InlinedProbeTreeMap &getChildren() const { return Children; }
243   std::vector<ProbeType> &getProbes() { return Probes; }
244   void addProbes(ProbeType Probe) { Probes.push_back(Probe); }
245   // Caller node of the inline site
246   MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent =
247       nullptr;
248   DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) {
249     auto Ret = Children.emplace(
250         Site, std::make_unique<DerivedProbeInlineTreeType>(Site));
251     Ret.first->second->Parent = this;
252     return Ret.first->second.get();
253   };
254 };
255 
256 // A Tri-tree based data structure to group probes by inline stack.
257 // A tree is allocated for a standalone .text section. A fake
258 // instance is created as the root of a tree.
259 // A real instance of this class is created for each function, either a
260 // not inlined function that has code in .text section or an inlined function.
261 class MCPseudoProbeInlineTree
262     : public MCPseudoProbeInlineTreeBase<MCPseudoProbe,
263                                          MCPseudoProbeInlineTree> {
264 public:
265   MCPseudoProbeInlineTree() = default;
266   MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; }
267   MCPseudoProbeInlineTree(const InlineSite &Site) {
268     this->Guid = std::get<0>(Site);
269   }
270 
271   // MCPseudoProbeInlineTree method based on Inlinees
272   void addPseudoProbe(const MCPseudoProbe &Probe,
273                       const MCPseudoProbeInlineStack &InlineStack);
274   void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe);
275 };
276 
277 // inline tree node for the decoded pseudo probe
278 class MCDecodedPseudoProbeInlineTree
279     : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
280                                          MCDecodedPseudoProbeInlineTree> {
281 public:
282   InlineSite ISite;
283   // Used for decoding
284   uint32_t ChildrenToProcess = 0;
285 
286   MCDecodedPseudoProbeInlineTree() = default;
287   MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
288 
289   // Return false if it's a dummy inline site
290   bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
291 };
292 
293 /// Instances of this class represent the pseudo probes inserted into a compile
294 /// unit.
295 class MCPseudoProbeSections {
296 public:
297   void addPseudoProbe(MCSymbol *FuncSym, const MCPseudoProbe &Probe,
298                       const MCPseudoProbeInlineStack &InlineStack) {
299     MCProbeDivisions[FuncSym].addPseudoProbe(Probe, InlineStack);
300   }
301 
302   // The addresses of MCPseudoProbeInlineTree are used by the tree structure and
303   // need to be stable.
304   using MCProbeDivisionMap = std::unordered_map<MCSymbol *, MCPseudoProbeInlineTree>;
305 
306 private:
307   // A collection of MCPseudoProbe for each function. The MCPseudoProbes are
308   // grouped by GUIDs due to inlining that can bring probes from different
309   // functions into one function.
310   MCProbeDivisionMap MCProbeDivisions;
311 
312 public:
313   const MCProbeDivisionMap &getMCProbes() const { return MCProbeDivisions; }
314 
315   bool empty() const { return MCProbeDivisions.empty(); }
316 
317   void emit(MCObjectStreamer *MCOS);
318 };
319 
320 class MCPseudoProbeTable {
321   // A collection of MCPseudoProbe in the current module grouped by
322   // functions. MCPseudoProbes will be encoded into a corresponding
323   // .pseudoprobe section. With functions emitted as separate comdats,
324   // a text section really only contains the code of a function solely, and the
325   // probes associated with the text section will be emitted into a standalone
326   // .pseudoprobe section that shares the same comdat group with the function.
327   MCPseudoProbeSections MCProbeSections;
328 
329 public:
330   static void emit(MCObjectStreamer *MCOS);
331 
332   MCPseudoProbeSections &getProbeSections() { return MCProbeSections; }
333 
334 #ifndef NDEBUG
335   static int DdgPrintIndent;
336 #endif
337 };
338 
339 class MCPseudoProbeDecoder {
340   // GUID to PseudoProbeFuncDesc map.
341   GUIDProbeFunctionMap GUID2FuncDescMap;
342 
343   // Address to probes map.
344   AddressProbesMap Address2ProbesMap;
345 
346   // The dummy root of the inline trie, all the outlined function will directly
347   // be the children of the dummy root, all the inlined function will be the
348   // children of its inlineer. So the relation would be like:
349   // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2
350   MCDecodedPseudoProbeInlineTree DummyInlineRoot;
351 
352   /// Points to the current location in the buffer.
353   const uint8_t *Data = nullptr;
354 
355   /// Points to the end of the buffer.
356   const uint8_t *End = nullptr;
357 
358   /// Whether encoding is based on a starting probe with absolute code address.
359   bool EncodingIsAddrBased = false;
360 
361   // Decoding helper function
362   template <typename T> ErrorOr<T> readUnencodedNumber();
363   template <typename T> ErrorOr<T> readUnsignedNumber();
364   template <typename T> ErrorOr<T> readSignedNumber();
365   ErrorOr<StringRef> readString(uint32_t Size);
366 
367 public:
368   using Uint64Set = DenseSet<uint64_t>;
369   using Uint64Map = DenseMap<uint64_t, uint64_t>;
370 
371   // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map.
372   bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size);
373 
374   // Decode pseudo_probe section to build address to probes map for specifed
375   // functions only.
376   bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size,
377                              const Uint64Set &GuildFilter,
378                              const Uint64Map &FuncStartAddrs);
379 
380   bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
381                              uint64_t &LastAddr, const Uint64Set &GuildFilter,
382                              const Uint64Map &FuncStartAddrs);
383 
384   // Print pseudo_probe_desc section info
385   void printGUID2FuncDescMap(raw_ostream &OS);
386 
387   // Print pseudo_probe section info, used along with show-disassembly
388   void printProbeForAddress(raw_ostream &OS, uint64_t Address);
389 
390   // do printProbeForAddress for all addresses
391   void printProbesForAllAddresses(raw_ostream &OS);
392 
393   // Look up the probe of a call for the input address
394   const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const;
395 
396   const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const;
397 
398   // Helper function to populate one probe's inline stack into
399   // \p InlineContextStack.
400   // Current leaf location info will be added if IncludeLeaf is true
401   // Example:
402   //  Current probe(bar:3) inlined at foo:2 then inlined at main:1
403   //  IncludeLeaf = true,  Output: [main:1, foo:2, bar:3]
404   //  IncludeLeaf = false, Output: [main:1, foo:2]
405   void getInlineContextForProbe(
406       const MCDecodedPseudoProbe *Probe,
407       SmallVectorImpl<MCPseduoProbeFrameLocation> &InlineContextStack,
408       bool IncludeLeaf) const;
409 
410   const AddressProbesMap &getAddress2ProbesMap() const {
411     return Address2ProbesMap;
412   }
413 
414   AddressProbesMap &getAddress2ProbesMap() { return Address2ProbesMap; }
415 
416   const GUIDProbeFunctionMap &getGUID2FuncDescMap() const {
417     return GUID2FuncDescMap;
418   }
419 
420   const MCPseudoProbeFuncDesc *
421   getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) const;
422 
423   const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
424     return DummyInlineRoot;
425   }
426 };
427 
428 } // end namespace llvm
429 
430 #endif // LLVM_MC_MCPSEUDOPROBE_H
431