1 //===--- PseudoProbe.cpp - Pseudo probe decoding utilities  ------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "PseudoProbe.h"
10 #include "ErrorHandling.h"
11 #include "llvm/Support/Endian.h"
12 #include "llvm/Support/LEB128.h"
13 #include "llvm/Support/raw_ostream.h"
14 #include <limits>
15 #include <memory>
16 
17 using namespace llvm;
18 using namespace sampleprof;
19 using namespace support;
20 
21 namespace llvm {
22 namespace sampleprof {
23 
getProbeFNameForGUID(const GUIDProbeFunctionMap & GUID2FuncMAP,uint64_t GUID)24 static StringRef getProbeFNameForGUID(const GUIDProbeFunctionMap &GUID2FuncMAP,
25                                       uint64_t GUID) {
26   auto It = GUID2FuncMAP.find(GUID);
27   assert(It != GUID2FuncMAP.end() &&
28          "Probe function must exist for a valid GUID");
29   return It->second.FuncName;
30 }
31 
print(raw_ostream & OS)32 void PseudoProbeFuncDesc::print(raw_ostream &OS) {
33   OS << "GUID: " << FuncGUID << " Name: " << FuncName << "\n";
34   OS << "Hash: " << FuncHash << "\n";
35 }
36 
getInlineContext(SmallVectorImpl<std::string> & ContextStack,const GUIDProbeFunctionMap & GUID2FuncMAP,bool ShowName) const37 void PseudoProbe::getInlineContext(SmallVectorImpl<std::string> &ContextStack,
38                                    const GUIDProbeFunctionMap &GUID2FuncMAP,
39                                    bool ShowName) const {
40   uint32_t Begin = ContextStack.size();
41   PseudoProbeInlineTree *Cur = InlineTree;
42   // It will add the string of each node's inline site during iteration.
43   // Note that it won't include the probe's belonging function(leaf location)
44   while (Cur->hasInlineSite()) {
45     std::string ContextStr;
46     if (ShowName) {
47       StringRef FuncName =
48           getProbeFNameForGUID(GUID2FuncMAP, std::get<0>(Cur->ISite));
49       ContextStr += FuncName.str();
50     } else {
51       ContextStr += Twine(std::get<0>(Cur->ISite)).str();
52     }
53     ContextStr += ":";
54     ContextStr += Twine(std::get<1>(Cur->ISite)).str();
55     ContextStack.emplace_back(ContextStr);
56     Cur = Cur->Parent;
57   }
58   // Make the ContextStack in caller-callee order
59   std::reverse(ContextStack.begin() + Begin, ContextStack.end());
60 }
61 
62 std::string
getInlineContextStr(const GUIDProbeFunctionMap & GUID2FuncMAP,bool ShowName) const63 PseudoProbe::getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP,
64                                  bool ShowName) const {
65   std::ostringstream OContextStr;
66   SmallVector<std::string, 16> ContextStack;
67   getInlineContext(ContextStack, GUID2FuncMAP, ShowName);
68   for (auto &CxtStr : ContextStack) {
69     if (OContextStr.str().size())
70       OContextStr << " @ ";
71     OContextStr << CxtStr;
72   }
73   return OContextStr.str();
74 }
75 
76 static const char *PseudoProbeTypeStr[3] = {"Block", "IndirectCall",
77                                             "DirectCall"};
78 
print(raw_ostream & OS,const GUIDProbeFunctionMap & GUID2FuncMAP,bool ShowName)79 void PseudoProbe::print(raw_ostream &OS,
80                         const GUIDProbeFunctionMap &GUID2FuncMAP,
81                         bool ShowName) {
82   OS << "FUNC: ";
83   if (ShowName) {
84     StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, GUID);
85     OS << FuncName.str() << " ";
86   } else {
87     OS << GUID << " ";
88   }
89   OS << "Index: " << Index << "  ";
90   OS << "Type: " << PseudoProbeTypeStr[static_cast<uint8_t>(Type)] << "  ";
91 
92   std::string InlineContextStr = getInlineContextStr(GUID2FuncMAP, ShowName);
93   if (InlineContextStr.size()) {
94     OS << "Inlined: @ ";
95     OS << InlineContextStr;
96   }
97   OS << "\n";
98 }
99 
readUnencodedNumber()100 template <typename T> T PseudoProbeDecoder::readUnencodedNumber() {
101   if (Data + sizeof(T) > End) {
102     exitWithError("Decode unencoded number error in " + SectionName +
103                   " section");
104   }
105   T Val = endian::readNext<T, little, unaligned>(Data);
106   return Val;
107 }
108 
readUnsignedNumber()109 template <typename T> T PseudoProbeDecoder::readUnsignedNumber() {
110   unsigned NumBytesRead = 0;
111   uint64_t Val = decodeULEB128(Data, &NumBytesRead);
112   if (Val > std::numeric_limits<T>::max() || (Data + NumBytesRead > End)) {
113     exitWithError("Decode number error in " + SectionName + " section");
114   }
115   Data += NumBytesRead;
116   return static_cast<T>(Val);
117 }
118 
readSignedNumber()119 template <typename T> T PseudoProbeDecoder::readSignedNumber() {
120   unsigned NumBytesRead = 0;
121   int64_t Val = decodeSLEB128(Data, &NumBytesRead);
122   if (Val > std::numeric_limits<T>::max() || (Data + NumBytesRead > End)) {
123     exitWithError("Decode number error in " + SectionName + " section");
124   }
125   Data += NumBytesRead;
126   return static_cast<T>(Val);
127 }
128 
readString(uint32_t Size)129 StringRef PseudoProbeDecoder::readString(uint32_t Size) {
130   StringRef Str(reinterpret_cast<const char *>(Data), Size);
131   if (Data + Size > End) {
132     exitWithError("Decode string error in " + SectionName + " section");
133   }
134   Data += Size;
135   return Str;
136 }
137 
buildGUID2FuncDescMap(const uint8_t * Start,std::size_t Size)138 void PseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
139                                                std::size_t Size) {
140   // The pseudo_probe_desc section has a format like:
141   // .section .pseudo_probe_desc,"",@progbits
142   // .quad -5182264717993193164   // GUID
143   // .quad 4294967295             // Hash
144   // .uleb 3                      // Name size
145   // .ascii "foo"                 // Name
146   // .quad -2624081020897602054
147   // .quad 174696971957
148   // .uleb 34
149   // .ascii "main"
150 #ifndef NDEBUG
151   SectionName = "pseudo_probe_desc";
152 #endif
153   Data = Start;
154   End = Data + Size;
155 
156   while (Data < End) {
157     uint64_t GUID = readUnencodedNumber<uint64_t>();
158     uint64_t Hash = readUnencodedNumber<uint64_t>();
159     uint32_t NameSize = readUnsignedNumber<uint32_t>();
160     StringRef Name = FunctionSamples::getCanonicalFnName(readString(NameSize));
161 
162     // Initialize PseudoProbeFuncDesc and populate it into GUID2FuncDescMap
163     GUID2FuncDescMap.emplace(GUID, PseudoProbeFuncDesc(GUID, Hash, Name));
164   }
165   assert(Data == End && "Have unprocessed data in pseudo_probe_desc section");
166 }
167 
buildAddress2ProbeMap(const uint8_t * Start,std::size_t Size)168 void PseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
169                                                std::size_t Size) {
170   // The pseudo_probe section encodes an inline forest and each tree has a
171   // format like:
172   //  FUNCTION BODY (one for each uninlined function present in the text
173   //  section)
174   //     GUID (uint64)
175   //         GUID of the function
176   //     NPROBES (ULEB128)
177   //         Number of probes originating from this function.
178   //     NUM_INLINED_FUNCTIONS (ULEB128)
179   //         Number of callees inlined into this function, aka number of
180   //         first-level inlinees
181   //     PROBE RECORDS
182   //         A list of NPROBES entries. Each entry contains:
183   //           INDEX (ULEB128)
184   //           TYPE (uint4)
185   //             0 - block probe, 1 - indirect call, 2 - direct call
186   //           ATTRIBUTE (uint3)
187   //             1 - reserved
188   //           ADDRESS_TYPE (uint1)
189   //             0 - code address, 1 - address delta
190   //           CODE_ADDRESS (uint64 or ULEB128)
191   //             code address or address delta, depending on Flag
192   //     INLINED FUNCTION RECORDS
193   //         A list of NUM_INLINED_FUNCTIONS entries describing each of the
194   //         inlined callees.  Each record contains:
195   //           INLINE SITE
196   //             Index of the callsite probe (ULEB128)
197   //           FUNCTION BODY
198   //             A FUNCTION BODY entry describing the inlined function.
199 #ifndef NDEBUG
200   SectionName = "pseudo_probe";
201 #endif
202   Data = Start;
203   End = Data + Size;
204 
205   PseudoProbeInlineTree *Root = &DummyInlineRoot;
206   PseudoProbeInlineTree *Cur = &DummyInlineRoot;
207   uint64_t LastAddr = 0;
208   uint32_t Index = 0;
209   // A DFS-based decoding
210   while (Data < End) {
211     if (Root == Cur) {
212       // Use a sequential id for top level inliner.
213       Index = Root->getChildren().size();
214     } else {
215       // Read inline site for inlinees
216       Index = readUnsignedNumber<uint32_t>();
217     }
218     // Switch/add to a new tree node(inlinee)
219     Cur = Cur->getOrAddNode(std::make_tuple(Cur->GUID, Index));
220     // Read guid
221     Cur->GUID = readUnencodedNumber<uint64_t>();
222     // Read number of probes in the current node.
223     uint32_t NodeCount = readUnsignedNumber<uint32_t>();
224     // Read number of direct inlinees
225     Cur->ChildrenToProcess = readUnsignedNumber<uint32_t>();
226     // Read all probes in this node
227     for (std::size_t I = 0; I < NodeCount; I++) {
228       // Read index
229       uint32_t Index = readUnsignedNumber<uint32_t>();
230       // Read type | flag.
231       uint8_t Value = readUnencodedNumber<uint8_t>();
232       uint8_t Kind = Value & 0xf;
233       uint8_t Attr = (Value & 0x70) >> 4;
234       // Read address
235       uint64_t Addr = 0;
236       if (Value & 0x80) {
237         int64_t Offset = readSignedNumber<int64_t>();
238         Addr = LastAddr + Offset;
239       } else {
240         Addr = readUnencodedNumber<int64_t>();
241       }
242       // Populate Address2ProbesMap
243       auto &Probes = Address2ProbesMap[Addr];
244       Probes.emplace_back(Addr, Cur->GUID, Index, PseudoProbeType(Kind), Attr,
245                           Cur);
246       Cur->addProbes(&Probes.back());
247       LastAddr = Addr;
248     }
249 
250     // Look for the parent for the next node by subtracting the current
251     // node count from tree counts along the parent chain. The first node
252     // in the chain that has a non-zero tree count is the target.
253     while (Cur != Root) {
254       if (Cur->ChildrenToProcess == 0) {
255         Cur = Cur->Parent;
256         if (Cur != Root) {
257           assert(Cur->ChildrenToProcess > 0 &&
258                  "Should have some unprocessed nodes");
259           Cur->ChildrenToProcess -= 1;
260         }
261       } else {
262         break;
263       }
264     }
265   }
266 
267   assert(Data == End && "Have unprocessed data in pseudo_probe section");
268   assert(Cur == Root &&
269          " Cur should point to root when the forest is fully built up");
270 }
271 
printGUID2FuncDescMap(raw_ostream & OS)272 void PseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) {
273   OS << "Pseudo Probe Desc:\n";
274   // Make the output deterministic
275   std::map<uint64_t, PseudoProbeFuncDesc> OrderedMap(GUID2FuncDescMap.begin(),
276                                                      GUID2FuncDescMap.end());
277   for (auto &I : OrderedMap) {
278     I.second.print(OS);
279   }
280 }
281 
printProbeForAddress(raw_ostream & OS,uint64_t Address)282 void PseudoProbeDecoder::printProbeForAddress(raw_ostream &OS,
283                                               uint64_t Address) {
284   auto It = Address2ProbesMap.find(Address);
285   if (It != Address2ProbesMap.end()) {
286     for (auto &Probe : It->second) {
287       OS << " [Probe]:\t";
288       Probe.print(OS, GUID2FuncDescMap, true);
289     }
290   }
291 }
292 
293 const PseudoProbe *
getCallProbeForAddr(uint64_t Address) const294 PseudoProbeDecoder::getCallProbeForAddr(uint64_t Address) const {
295   auto It = Address2ProbesMap.find(Address);
296   if (It == Address2ProbesMap.end())
297     return nullptr;
298   const auto &Probes = It->second;
299 
300   const PseudoProbe *CallProbe = nullptr;
301   for (const auto &Probe : Probes) {
302     if (Probe.isCall()) {
303       assert(!CallProbe &&
304              "There should be only one call probe corresponding to address "
305              "which is a callsite.");
306       CallProbe = &Probe;
307     }
308   }
309   return CallProbe;
310 }
311 
312 const PseudoProbeFuncDesc *
getFuncDescForGUID(uint64_t GUID) const313 PseudoProbeDecoder::getFuncDescForGUID(uint64_t GUID) const {
314   auto It = GUID2FuncDescMap.find(GUID);
315   assert(It != GUID2FuncDescMap.end() && "Function descriptor doesn't exist");
316   return &It->second;
317 }
318 
getInlineContextForProbe(const PseudoProbe * Probe,SmallVectorImpl<std::string> & InlineContextStack,bool IncludeLeaf) const319 void PseudoProbeDecoder::getInlineContextForProbe(
320     const PseudoProbe *Probe, SmallVectorImpl<std::string> &InlineContextStack,
321     bool IncludeLeaf) const {
322   Probe->getInlineContext(InlineContextStack, GUID2FuncDescMap, true);
323   if (!IncludeLeaf)
324     return;
325   // Note that the context from probe doesn't include leaf frame,
326   // hence we need to retrieve and prepend leaf if requested.
327   const auto *FuncDesc = getFuncDescForGUID(Probe->GUID);
328   InlineContextStack.emplace_back(FuncDesc->FuncName + ":" +
329                                   Twine(Probe->Index).str());
330 }
331 
332 const PseudoProbeFuncDesc *
getInlinerDescForProbe(const PseudoProbe * Probe) const333 PseudoProbeDecoder::getInlinerDescForProbe(const PseudoProbe *Probe) const {
334   PseudoProbeInlineTree *InlinerNode = Probe->InlineTree;
335   if (!InlinerNode->hasInlineSite())
336     return nullptr;
337   return getFuncDescForGUID(std::get<0>(InlinerNode->ISite));
338 }
339 
340 } // end namespace sampleprof
341 } // end namespace llvm
342