1 //===- MCPseudoProbe.h - Pseudo probe encoding support ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declaration of the MCPseudoProbe to support the pseudo
10 // probe encoding for AutoFDO. Pseudo probes together with their inline context
11 // are encoded in a DFS recursive way in the .pseudoprobe sections. For each
12 // .pseudoprobe section, the encoded binary data consist of a single or mutiple
13 // function records each for one outlined function. A function record has the
14 // following format :
15 //
16 // FUNCTION BODY (one for each outlined function present in the text section)
17 //    GUID (uint64)
18 //        GUID of the function
19 //    NPROBES (ULEB128)
20 //        Number of probes originating from this function.
21 //    NUM_INLINED_FUNCTIONS (ULEB128)
22 //        Number of callees inlined into this function, aka number of
23 //        first-level inlinees
24 //    PROBE RECORDS
25 //        A list of NPROBES entries. Each entry contains:
26 //          INDEX (ULEB128)
27 //          TYPE (uint4)
28 //            0 - block probe, 1 - indirect call, 2 - direct call
29 //          ATTRIBUTE (uint3)
30 //            1 - reserved
31 //          ADDRESS_TYPE (uint1)
32 //            0 - code address, 1 - address delta
33 //          CODE_ADDRESS (uint64 or ULEB128)
34 //            code address or address delta, depending on ADDRESS_TYPE
35 //    INLINED FUNCTION RECORDS
36 //        A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
37 //        callees.  Each record contains:
38 //          INLINE SITE
39 //            ID of the callsite probe (ULEB128)
40 //          FUNCTION BODY
41 //            A FUNCTION BODY entry describing the inlined function.
42 //===----------------------------------------------------------------------===//
43 
44 #ifndef LLVM_MC_MCPSEUDOPROBE_H
45 #define LLVM_MC_MCPSEUDOPROBE_H
46 
47 #include "llvm/ADT/SmallVector.h"
48 #include "llvm/ADT/StringRef.h"
49 #include "llvm/IR/PseudoProbe.h"
50 #include "llvm/Support/ErrorOr.h"
51 #include <list>
52 #include <map>
53 #include <memory>
54 #include <string>
55 #include <tuple>
56 #include <type_traits>
57 #include <unordered_map>
58 #include <unordered_set>
59 #include <vector>
60 
61 namespace llvm {
62 
63 class MCSection;
64 class MCSymbol;
65 class MCObjectStreamer;
66 class raw_ostream;
67 
68 enum class MCPseudoProbeFlag {
69   // If set, indicates that the probe is encoded as an address delta
70   // instead of a real code address.
71   AddressDelta = 0x1,
72 };
73 
74 // Function descriptor decoded from .pseudo_probe_desc section
75 struct MCPseudoProbeFuncDesc {
76   uint64_t FuncGUID = 0;
77   uint64_t FuncHash = 0;
78   std::string FuncName;
79 
80   MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name)
81       : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){};
82 
83   void print(raw_ostream &OS);
84 };
85 
86 class MCDecodedPseudoProbe;
87 
88 // An inline frame has the form <CalleeGuid, ProbeID>
89 using InlineSite = std::tuple<uint64_t, uint32_t>;
90 using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
91 // GUID to PseudoProbeFuncDesc map
92 using GUIDProbeFunctionMap =
93     std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
94 // Address to pseudo probes map.
95 using AddressProbesMap =
96     std::unordered_map<uint64_t, std::list<MCDecodedPseudoProbe>>;
97 
98 class MCDecodedPseudoProbeInlineTree;
99 
100 class MCPseudoProbeBase {
101 protected:
102   uint64_t Guid;
103   uint64_t Index;
104   uint8_t Attributes;
105   uint8_t Type;
106   // The value should be equal to PseudoProbeReservedId::Last + 1 which is
107   // defined in SampleProfileProbe.h. The header file is not included here to
108   // reduce the dependency from MC to IPO.
109   const static uint32_t PseudoProbeFirstId = 1;
110 
111 public:
112   MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T)
113       : Guid(G), Index(I), Attributes(At), Type(T) {}
114 
115   bool isEntry() const { return Index == PseudoProbeFirstId; }
116 
117   uint64_t getGuid() const { return Guid; }
118 
119   uint64_t getIndex() const { return Index; }
120 
121   uint8_t getAttributes() const { return Attributes; }
122 
123   uint8_t getType() const { return Type; }
124 
125   bool isBlock() const {
126     return Type == static_cast<uint8_t>(PseudoProbeType::Block);
127   }
128 
129   bool isIndirectCall() const {
130     return Type == static_cast<uint8_t>(PseudoProbeType::IndirectCall);
131   }
132 
133   bool isDirectCall() const {
134     return Type == static_cast<uint8_t>(PseudoProbeType::DirectCall);
135   }
136 
137   bool isCall() const { return isIndirectCall() || isDirectCall(); }
138 
139   void setAttributes(uint8_t Attr) { Attributes = Attr; }
140 };
141 
142 /// Instances of this class represent a pseudo probe instance for a pseudo probe
143 /// table entry, which is created during a machine instruction is assembled and
144 /// uses an address from a temporary label created at the current address in the
145 /// current section.
146 class MCPseudoProbe : public MCPseudoProbeBase {
147   MCSymbol *Label;
148 
149 public:
150   MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
151                 uint64_t Attributes)
152       : MCPseudoProbeBase(Guid, Index, Attributes, Type), Label(Label) {
153     assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
154     assert(Attributes <= 0xFF &&
155            "Probe attributes too big to encode, exceeding 2^16");
156   }
157 
158   MCSymbol *getLabel() const { return Label; }
159   void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
160 };
161 
162 // Represents a callsite with caller function name and probe id
163 using MCPseduoProbeFrameLocation = std::pair<StringRef, uint32_t>;
164 
165 class MCDecodedPseudoProbe : public MCPseudoProbeBase {
166   uint64_t Address;
167   MCDecodedPseudoProbeInlineTree *InlineTree;
168 
169 public:
170   MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K,
171                        uint8_t At, MCDecodedPseudoProbeInlineTree *Tree)
172       : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K)), Address(Ad),
173         InlineTree(Tree){};
174 
175   uint64_t getAddress() const { return Address; }
176 
177   void setAddress(uint64_t Addr) { Address = Addr; }
178 
179   MCDecodedPseudoProbeInlineTree *getInlineTreeNode() const {
180     return InlineTree;
181   }
182 
183   // Get the inlined context by traversing current inline tree backwards,
184   // each tree node has its InlineSite which is taken as the context.
185   // \p ContextStack is populated in root to leaf order
186   void
187   getInlineContext(SmallVectorImpl<MCPseduoProbeFrameLocation> &ContextStack,
188                    const GUIDProbeFunctionMap &GUID2FuncMAP) const;
189 
190   // Helper function to get the string from context stack
191   std::string
192   getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const;
193 
194   // Print pseudo probe while disassembling
195   void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP,
196              bool ShowName) const;
197 };
198 
199 template <typename ProbeType, typename DerivedProbeInlineTreeType>
200 class MCPseudoProbeInlineTreeBase {
201   struct InlineSiteHash {
202     uint64_t operator()(const InlineSite &Site) const {
203       return std::get<0>(Site) ^ std::get<1>(Site);
204     }
205   };
206 
207 protected:
208   // Track children (e.g. inlinees) of current context
209   using InlinedProbeTreeMap = std::unordered_map<
210       InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>;
211   InlinedProbeTreeMap Children;
212   // Set of probes that come with the function.
213   std::vector<ProbeType> Probes;
214   MCPseudoProbeInlineTreeBase() {
215     static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase,
216                                   DerivedProbeInlineTreeType>::value,
217                   "DerivedProbeInlineTreeType must be subclass of "
218                   "MCPseudoProbeInlineTreeBase");
219   }
220 
221 public:
222   uint64_t Guid = 0;
223 
224   // Root node has a GUID 0.
225   bool isRoot() const { return Guid == 0; }
226   InlinedProbeTreeMap &getChildren() { return Children; }
227   const InlinedProbeTreeMap &getChildren() const { return Children; }
228   std::vector<ProbeType> &getProbes() { return Probes; }
229   void addProbes(ProbeType Probe) { Probes.push_back(Probe); }
230   // Caller node of the inline site
231   MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent;
232   DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) {
233     auto Ret = Children.emplace(
234         Site, std::make_unique<DerivedProbeInlineTreeType>(Site));
235     Ret.first->second->Parent = this;
236     return Ret.first->second.get();
237   };
238 };
239 
240 // A Tri-tree based data structure to group probes by inline stack.
241 // A tree is allocated for a standalone .text section. A fake
242 // instance is created as the root of a tree.
243 // A real instance of this class is created for each function, either a
244 // not inlined function that has code in .text section or an inlined function.
245 class MCPseudoProbeInlineTree
246     : public MCPseudoProbeInlineTreeBase<MCPseudoProbe,
247                                          MCPseudoProbeInlineTree> {
248 public:
249   MCPseudoProbeInlineTree() = default;
250   MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; }
251   MCPseudoProbeInlineTree(const InlineSite &Site) {
252     this->Guid = std::get<0>(Site);
253   }
254 
255   // MCPseudoProbeInlineTree method based on Inlinees
256   void addPseudoProbe(const MCPseudoProbe &Probe,
257                       const MCPseudoProbeInlineStack &InlineStack);
258   void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe);
259 };
260 
261 // inline tree node for the decoded pseudo probe
262 class MCDecodedPseudoProbeInlineTree
263     : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
264                                          MCDecodedPseudoProbeInlineTree> {
265 public:
266   InlineSite ISite;
267   // Used for decoding
268   uint32_t ChildrenToProcess = 0;
269 
270   MCDecodedPseudoProbeInlineTree() = default;
271   MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
272 
273   // Return false if it's a dummy inline site
274   bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
275 };
276 
277 /// Instances of this class represent the pseudo probes inserted into a compile
278 /// unit.
279 class MCPseudoProbeSection {
280 public:
281   void addPseudoProbe(MCSection *Sec, const MCPseudoProbe &Probe,
282                       const MCPseudoProbeInlineStack &InlineStack) {
283     MCProbeDivisions[Sec].addPseudoProbe(Probe, InlineStack);
284   }
285 
286   // TODO: Sort by getOrdinal to ensure a determinstic section order
287   using MCProbeDivisionMap = std::map<MCSection *, MCPseudoProbeInlineTree>;
288 
289 private:
290   // A collection of MCPseudoProbe for each text section. The MCPseudoProbes
291   // are grouped by GUID of the functions where they are from and will be
292   // encoded by groups. In the comdat scenario where a text section really only
293   // contains the code of a function solely, the probes associated with a comdat
294   // function are still grouped by GUIDs due to inlining that can bring probes
295   // from different functions into one function.
296   MCProbeDivisionMap MCProbeDivisions;
297 
298 public:
299   const MCProbeDivisionMap &getMCProbes() const { return MCProbeDivisions; }
300 
301   bool empty() const { return MCProbeDivisions.empty(); }
302 
303   void emit(MCObjectStreamer *MCOS);
304 };
305 
306 class MCPseudoProbeTable {
307   // A collection of MCPseudoProbe in the current module grouped by text
308   // sections. MCPseudoProbes will be encoded into a corresponding
309   // .pseudoprobe section. With functions emitted as separate comdats,
310   // a text section really only contains the code of a function solely, and the
311   // probes associated with the text section will be emitted into a standalone
312   // .pseudoprobe section that shares the same comdat group with the function.
313   MCPseudoProbeSection MCProbeSections;
314 
315 public:
316   static void emit(MCObjectStreamer *MCOS);
317 
318   MCPseudoProbeSection &getProbeSections() { return MCProbeSections; }
319 
320 #ifndef NDEBUG
321   static int DdgPrintIndent;
322 #endif
323 };
324 
325 class MCPseudoProbeDecoder {
326   // GUID to PseudoProbeFuncDesc map.
327   GUIDProbeFunctionMap GUID2FuncDescMap;
328 
329   // Address to probes map.
330   AddressProbesMap Address2ProbesMap;
331 
332   // The dummy root of the inline trie, all the outlined function will directly
333   // be the children of the dummy root, all the inlined function will be the
334   // children of its inlineer. So the relation would be like:
335   // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2
336   MCDecodedPseudoProbeInlineTree DummyInlineRoot;
337 
338   /// Points to the current location in the buffer.
339   const uint8_t *Data = nullptr;
340 
341   /// Points to the end of the buffer.
342   const uint8_t *End = nullptr;
343 
344   // Decoding helper function
345   template <typename T> ErrorOr<T> readUnencodedNumber();
346   template <typename T> ErrorOr<T> readUnsignedNumber();
347   template <typename T> ErrorOr<T> readSignedNumber();
348   ErrorOr<StringRef> readString(uint32_t Size);
349 
350 public:
351   // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map.
352   bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size);
353 
354   // Decode pseudo_probe section to build address to probes map.
355   bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size);
356 
357   // Decode pseudo_probe section to build address to probes map for specifed
358   // functions only.
359   bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size,
360                              std::unordered_set<uint64_t> &GuildFilter);
361 
362   bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
363                              uint64_t &LastAddr,
364                              std::unordered_set<uint64_t> &GuildFilter);
365 
366   // Print pseudo_probe_desc section info
367   void printGUID2FuncDescMap(raw_ostream &OS);
368 
369   // Print pseudo_probe section info, used along with show-disassembly
370   void printProbeForAddress(raw_ostream &OS, uint64_t Address);
371 
372   // do printProbeForAddress for all addresses
373   void printProbesForAllAddresses(raw_ostream &OS);
374 
375   // Look up the probe of a call for the input address
376   const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const;
377 
378   const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const;
379 
380   // Helper function to populate one probe's inline stack into
381   // \p InlineContextStack.
382   // Current leaf location info will be added if IncludeLeaf is true
383   // Example:
384   //  Current probe(bar:3) inlined at foo:2 then inlined at main:1
385   //  IncludeLeaf = true,  Output: [main:1, foo:2, bar:3]
386   //  IncludeLeaf = false, Output: [main:1, foo:2]
387   void getInlineContextForProbe(
388       const MCDecodedPseudoProbe *Probe,
389       SmallVectorImpl<MCPseduoProbeFrameLocation> &InlineContextStack,
390       bool IncludeLeaf) const;
391 
392   const AddressProbesMap &getAddress2ProbesMap() const {
393     return Address2ProbesMap;
394   }
395 
396   AddressProbesMap &getAddress2ProbesMap() { return Address2ProbesMap; }
397 
398   const GUIDProbeFunctionMap &getGUID2FuncDescMap() const {
399     return GUID2FuncDescMap;
400   }
401 
402   const MCPseudoProbeFuncDesc *
403   getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) const;
404 
405   const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
406     return DummyInlineRoot;
407   }
408 };
409 
410 } // end namespace llvm
411 
412 #endif // LLVM_MC_MCPSEUDOPROBE_H
413