1 //===--- PseudoProbe.cpp - Pseudo probe decoding utilities  ------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "PseudoProbe.h"
10 #include "ErrorHandling.h"
11 #include "llvm/Support/Endian.h"
12 #include "llvm/Support/LEB128.h"
13 #include "llvm/Support/raw_ostream.h"
14 #include <limits>
15 #include <memory>
16 
17 using namespace llvm;
18 using namespace sampleprof;
19 using namespace support;
20 
21 namespace llvm {
22 namespace sampleprof {
23 
getProbeFNameForGUID(const GUIDProbeFunctionMap & GUID2FuncMAP,uint64_t GUID)24 static StringRef getProbeFNameForGUID(const GUIDProbeFunctionMap &GUID2FuncMAP,
25                                       uint64_t GUID) {
26   auto It = GUID2FuncMAP.find(GUID);
27   assert(It != GUID2FuncMAP.end() &&
28          "Probe function must exist for a valid GUID");
29   return It->second.FuncName;
30 }
31 
print(raw_ostream & OS)32 void PseudoProbeFuncDesc::print(raw_ostream &OS) {
33   OS << "GUID: " << FuncGUID << " Name: " << FuncName << "\n";
34   OS << "Hash: " << FuncHash << "\n";
35 }
36 
getInlineContext(SmallVectorImpl<std::string> & ContextStack,const GUIDProbeFunctionMap & GUID2FuncMAP,bool ShowName) const37 void PseudoProbe::getInlineContext(SmallVectorImpl<std::string> &ContextStack,
38                                    const GUIDProbeFunctionMap &GUID2FuncMAP,
39                                    bool ShowName) const {
40   uint32_t Begin = ContextStack.size();
41   PseudoProbeInlineTree *Cur = InlineTree;
42   // It will add the string of each node's inline site during iteration.
43   // Note that it won't include the probe's belonging function(leaf location)
44   while (Cur->hasInlineSite()) {
45     std::string ContextStr;
46     if (ShowName) {
47       StringRef FuncName =
48           getProbeFNameForGUID(GUID2FuncMAP, std::get<0>(Cur->ISite));
49       ContextStr += FuncName.str();
50     } else {
51       ContextStr += Twine(std::get<0>(Cur->ISite)).str();
52     }
53     ContextStr += ":";
54     ContextStr += Twine(std::get<1>(Cur->ISite)).str();
55     ContextStack.emplace_back(ContextStr);
56     Cur = Cur->Parent;
57   }
58   // Make the ContextStack in caller-callee order
59   std::reverse(ContextStack.begin() + Begin, ContextStack.end());
60 }
61 
62 std::string
getInlineContextStr(const GUIDProbeFunctionMap & GUID2FuncMAP,bool ShowName) const63 PseudoProbe::getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP,
64                                  bool ShowName) const {
65   std::ostringstream OContextStr;
66   SmallVector<std::string, 16> ContextStack;
67   getInlineContext(ContextStack, GUID2FuncMAP, ShowName);
68   for (auto &CxtStr : ContextStack) {
69     if (OContextStr.str().size())
70       OContextStr << " @ ";
71     OContextStr << CxtStr;
72   }
73   return OContextStr.str();
74 }
75 
76 static const char *PseudoProbeTypeStr[3] = {"Block", "IndirectCall",
77                                             "DirectCall"};
78 
print(raw_ostream & OS,const GUIDProbeFunctionMap & GUID2FuncMAP,bool ShowName)79 void PseudoProbe::print(raw_ostream &OS,
80                         const GUIDProbeFunctionMap &GUID2FuncMAP,
81                         bool ShowName) {
82   OS << "FUNC: ";
83   if (ShowName) {
84     StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, GUID);
85     OS << FuncName.str() << " ";
86   } else {
87     OS << GUID << " ";
88   }
89   OS << "Index: " << Index << "  ";
90   OS << "Type: " << PseudoProbeTypeStr[static_cast<uint8_t>(Type)] << "  ";
91   if (isDangling()) {
92     OS << "Dangling  ";
93   }
94   if (isTailCall()) {
95     OS << "TailCall  ";
96   }
97   std::string InlineContextStr = getInlineContextStr(GUID2FuncMAP, ShowName);
98   if (InlineContextStr.size()) {
99     OS << "Inlined: @ ";
100     OS << InlineContextStr;
101   }
102   OS << "\n";
103 }
104 
readUnencodedNumber()105 template <typename T> T PseudoProbeDecoder::readUnencodedNumber() {
106   if (Data + sizeof(T) > End) {
107     exitWithError("Decode unencoded number error in " + SectionName +
108                   " section");
109   }
110   T Val = endian::readNext<T, little, unaligned>(Data);
111   return Val;
112 }
113 
readUnsignedNumber()114 template <typename T> T PseudoProbeDecoder::readUnsignedNumber() {
115   unsigned NumBytesRead = 0;
116   uint64_t Val = decodeULEB128(Data, &NumBytesRead);
117   if (Val > std::numeric_limits<T>::max() || (Data + NumBytesRead > End)) {
118     exitWithError("Decode number error in " + SectionName + " section");
119   }
120   Data += NumBytesRead;
121   return static_cast<T>(Val);
122 }
123 
readSignedNumber()124 template <typename T> T PseudoProbeDecoder::readSignedNumber() {
125   unsigned NumBytesRead = 0;
126   int64_t Val = decodeSLEB128(Data, &NumBytesRead);
127   if (Val > std::numeric_limits<T>::max() || (Data + NumBytesRead > End)) {
128     exitWithError("Decode number error in " + SectionName + " section");
129   }
130   Data += NumBytesRead;
131   return static_cast<T>(Val);
132 }
133 
readString(uint32_t Size)134 StringRef PseudoProbeDecoder::readString(uint32_t Size) {
135   StringRef Str(reinterpret_cast<const char *>(Data), Size);
136   if (Data + Size > End) {
137     exitWithError("Decode string error in " + SectionName + " section");
138   }
139   Data += Size;
140   return Str;
141 }
142 
buildGUID2FuncDescMap(const uint8_t * Start,std::size_t Size)143 void PseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
144                                                std::size_t Size) {
145   // The pseudo_probe_desc section has a format like:
146   // .section .pseudo_probe_desc,"",@progbits
147   // .quad -5182264717993193164   // GUID
148   // .quad 4294967295             // Hash
149   // .uleb 3                      // Name size
150   // .ascii "foo"                 // Name
151   // .quad -2624081020897602054
152   // .quad 174696971957
153   // .uleb 34
154   // .ascii "main"
155 #ifndef NDEBUG
156   SectionName = "pseudo_probe_desc";
157 #endif
158   Data = Start;
159   End = Data + Size;
160 
161   while (Data < End) {
162     uint64_t GUID = readUnencodedNumber<uint64_t>();
163     uint64_t Hash = readUnencodedNumber<uint64_t>();
164     uint32_t NameSize = readUnsignedNumber<uint32_t>();
165     StringRef Name = FunctionSamples::getCanonicalFnName(readString(NameSize));
166 
167     // Initialize PseudoProbeFuncDesc and populate it into GUID2FuncDescMap
168     GUID2FuncDescMap.emplace(GUID, PseudoProbeFuncDesc(GUID, Hash, Name));
169   }
170   assert(Data == End && "Have unprocessed data in pseudo_probe_desc section");
171 }
172 
buildAddress2ProbeMap(const uint8_t * Start,std::size_t Size)173 void PseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
174                                                std::size_t Size) {
175   // The pseudo_probe section encodes an inline forest and each tree has a
176   // format like:
177   //  FUNCTION BODY (one for each uninlined function present in the text
178   //  section)
179   //     GUID (uint64)
180   //         GUID of the function
181   //     NPROBES (ULEB128)
182   //         Number of probes originating from this function.
183   //     NUM_INLINED_FUNCTIONS (ULEB128)
184   //         Number of callees inlined into this function, aka number of
185   //         first-level inlinees
186   //     PROBE RECORDS
187   //         A list of NPROBES entries. Each entry contains:
188   //           INDEX (ULEB128)
189   //           TYPE (uint4)
190   //             0 - block probe, 1 - indirect call, 2 - direct call
191   //           ATTRIBUTE (uint3)
192   //             1 - tail call, 2 - dangling
193   //           ADDRESS_TYPE (uint1)
194   //             0 - code address, 1 - address delta
195   //           CODE_ADDRESS (uint64 or ULEB128)
196   //             code address or address delta, depending on Flag
197   //     INLINED FUNCTION RECORDS
198   //         A list of NUM_INLINED_FUNCTIONS entries describing each of the
199   //         inlined callees.  Each record contains:
200   //           INLINE SITE
201   //             Index of the callsite probe (ULEB128)
202   //           FUNCTION BODY
203   //             A FUNCTION BODY entry describing the inlined function.
204 #ifndef NDEBUG
205   SectionName = "pseudo_probe";
206 #endif
207   Data = Start;
208   End = Data + Size;
209 
210   PseudoProbeInlineTree *Root = &DummyInlineRoot;
211   PseudoProbeInlineTree *Cur = &DummyInlineRoot;
212   uint64_t LastAddr = 0;
213   uint32_t Index = 0;
214   // A DFS-based decoding
215   while (Data < End) {
216     if (Root == Cur) {
217       // Use a sequential id for top level inliner.
218       Index = Root->getChildren().size();
219     } else {
220       // Read inline site for inlinees
221       Index = readUnsignedNumber<uint32_t>();
222     }
223     // Switch/add to a new tree node(inlinee)
224     Cur = Cur->getOrAddNode(std::make_tuple(Cur->GUID, Index));
225     // Read guid
226     Cur->GUID = readUnencodedNumber<uint64_t>();
227     // Read number of probes in the current node.
228     uint32_t NodeCount = readUnsignedNumber<uint32_t>();
229     // Read number of direct inlinees
230     Cur->ChildrenToProcess = readUnsignedNumber<uint32_t>();
231     // Read all probes in this node
232     for (std::size_t I = 0; I < NodeCount; I++) {
233       // Read index
234       uint32_t Index = readUnsignedNumber<uint32_t>();
235       // Read type | flag.
236       uint8_t Value = readUnencodedNumber<uint8_t>();
237       uint8_t Kind = Value & 0xf;
238       uint8_t Attr = (Value & 0x70) >> 4;
239       // Read address
240       uint64_t Addr = 0;
241       if (Value & 0x80) {
242         int64_t Offset = readSignedNumber<int64_t>();
243         Addr = LastAddr + Offset;
244       } else {
245         Addr = readUnencodedNumber<int64_t>();
246       }
247       // Populate Address2ProbesMap
248       auto &Probes = Address2ProbesMap[Addr];
249       Probes.emplace_back(Addr, Cur->GUID, Index, PseudoProbeType(Kind), Attr,
250                           Cur);
251       Cur->addProbes(&Probes.back());
252       LastAddr = Addr;
253     }
254 
255     // Look for the parent for the next node by subtracting the current
256     // node count from tree counts along the parent chain. The first node
257     // in the chain that has a non-zero tree count is the target.
258     while (Cur != Root) {
259       if (Cur->ChildrenToProcess == 0) {
260         Cur = Cur->Parent;
261         if (Cur != Root) {
262           assert(Cur->ChildrenToProcess > 0 &&
263                  "Should have some unprocessed nodes");
264           Cur->ChildrenToProcess -= 1;
265         }
266       } else {
267         break;
268       }
269     }
270   }
271 
272   assert(Data == End && "Have unprocessed data in pseudo_probe section");
273   assert(Cur == Root &&
274          " Cur should point to root when the forest is fully built up");
275 }
276 
printGUID2FuncDescMap(raw_ostream & OS)277 void PseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) {
278   OS << "Pseudo Probe Desc:\n";
279   // Make the output deterministic
280   std::map<uint64_t, PseudoProbeFuncDesc> OrderedMap(GUID2FuncDescMap.begin(),
281                                                      GUID2FuncDescMap.end());
282   for (auto &I : OrderedMap) {
283     I.second.print(OS);
284   }
285 }
286 
printProbeForAddress(raw_ostream & OS,uint64_t Address)287 void PseudoProbeDecoder::printProbeForAddress(raw_ostream &OS,
288                                               uint64_t Address) {
289   auto It = Address2ProbesMap.find(Address);
290   if (It != Address2ProbesMap.end()) {
291     for (auto &Probe : It->second) {
292       OS << " [Probe]:\t";
293       Probe.print(OS, GUID2FuncDescMap, true);
294     }
295   }
296 }
297 
298 const PseudoProbe *
getCallProbeForAddr(uint64_t Address) const299 PseudoProbeDecoder::getCallProbeForAddr(uint64_t Address) const {
300   auto It = Address2ProbesMap.find(Address);
301   if (It == Address2ProbesMap.end())
302     return nullptr;
303   const auto &Probes = It->second;
304 
305   const PseudoProbe *CallProbe = nullptr;
306   for (const auto &Probe : Probes) {
307     if (Probe.isCall()) {
308       assert(!CallProbe &&
309              "There should be only one call probe corresponding to address "
310              "which is a callsite.");
311       CallProbe = &Probe;
312     }
313   }
314   return CallProbe;
315 }
316 
317 const PseudoProbeFuncDesc *
getFuncDescForGUID(uint64_t GUID) const318 PseudoProbeDecoder::getFuncDescForGUID(uint64_t GUID) const {
319   auto It = GUID2FuncDescMap.find(GUID);
320   assert(It != GUID2FuncDescMap.end() && "Function descriptor doesn't exist");
321   return &It->second;
322 }
323 
getInlineContextForProbe(const PseudoProbe * Probe,SmallVectorImpl<std::string> & InlineContextStack,bool IncludeLeaf) const324 void PseudoProbeDecoder::getInlineContextForProbe(
325     const PseudoProbe *Probe, SmallVectorImpl<std::string> &InlineContextStack,
326     bool IncludeLeaf) const {
327   Probe->getInlineContext(InlineContextStack, GUID2FuncDescMap, true);
328   if (!IncludeLeaf)
329     return;
330   // Note that the context from probe doesn't include leaf frame,
331   // hence we need to retrieve and prepend leaf if requested.
332   const auto *FuncDesc = getFuncDescForGUID(Probe->GUID);
333   InlineContextStack.emplace_back(FuncDesc->FuncName + ":" +
334                                   Twine(Probe->Index).str());
335 }
336 
337 const PseudoProbeFuncDesc *
getInlinerDescForProbe(const PseudoProbe * Probe) const338 PseudoProbeDecoder::getInlinerDescForProbe(const PseudoProbe *Probe) const {
339   PseudoProbeInlineTree *InlinerNode = Probe->InlineTree;
340   if (!InlinerNode->hasInlineSite())
341     return nullptr;
342   return getFuncDescForGUID(std::get<0>(InlinerNode->ISite));
343 }
344 
345 } // end namespace sampleprof
346 } // end namespace llvm
347