1 //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains common definitions used in the reading and writing of
11 // sample profile data.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H
16 #define LLVM_PROFILEDATA_SAMPLEPROF_H
17 
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringMap.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalValue.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/ErrorOr.h"
27 #include "llvm/Support/MathExtras.h"
28 #include <algorithm>
29 #include <cstdint>
30 #include <map>
31 #include <string>
32 #include <system_error>
33 #include <utility>
34 
35 namespace llvm {
36 
37 class raw_ostream;
38 
39 const std::error_category &sampleprof_category();
40 
41 enum class sampleprof_error {
42   success = 0,
43   bad_magic,
44   unsupported_version,
45   too_large,
46   truncated,
47   malformed,
48   unrecognized_format,
49   unsupported_writing_format,
50   truncated_name_table,
51   not_implemented,
52   counter_overflow,
53   ostream_seek_unsupported
54 };
55 
make_error_code(sampleprof_error E)56 inline std::error_code make_error_code(sampleprof_error E) {
57   return std::error_code(static_cast<int>(E), sampleprof_category());
58 }
59 
MergeResult(sampleprof_error & Accumulator,sampleprof_error Result)60 inline sampleprof_error MergeResult(sampleprof_error &Accumulator,
61                                     sampleprof_error Result) {
62   // Prefer first error encountered as later errors may be secondary effects of
63   // the initial problem.
64   if (Accumulator == sampleprof_error::success &&
65       Result != sampleprof_error::success)
66     Accumulator = Result;
67   return Accumulator;
68 }
69 
70 } // end namespace llvm
71 
72 namespace std {
73 
74 template <>
75 struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {};
76 
77 } // end namespace std
78 
79 namespace llvm {
80 namespace sampleprof {
81 
82 enum SampleProfileFormat {
83   SPF_None = 0,
84   SPF_Text = 0x1,
85   SPF_Compact_Binary = 0x2,
86   SPF_GCC = 0x3,
87   SPF_Binary = 0xff
88 };
89 
90 static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) {
91   return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) |
92          uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) |
93          uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) |
94          uint64_t('2') << (64 - 56) | uint64_t(Format);
95 }
96 
97 // Get the proper representation of a string in the input Format.
98 static inline StringRef getRepInFormat(StringRef Name,
99                                        SampleProfileFormat Format,
100                                        std::string &GUIDBuf) {
101   if (Name.empty())
102     return Name;
103   GUIDBuf = std::to_string(Function::getGUID(Name));
104   return (Format == SPF_Compact_Binary) ? StringRef(GUIDBuf) : Name;
105 }
106 
107 static inline uint64_t SPVersion() { return 103; }
108 
109 /// Represents the relative location of an instruction.
110 ///
111 /// Instruction locations are specified by the line offset from the
112 /// beginning of the function (marked by the line where the function
113 /// header is) and the discriminator value within that line.
114 ///
115 /// The discriminator value is useful to distinguish instructions
116 /// that are on the same line but belong to different basic blocks
117 /// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
118 struct LineLocation {
119   LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {}
120 
121   void print(raw_ostream &OS) const;
122   void dump() const;
123 
124   bool operator<(const LineLocation &O) const {
125     return LineOffset < O.LineOffset ||
126            (LineOffset == O.LineOffset && Discriminator < O.Discriminator);
127   }
128 
129   uint32_t LineOffset;
130   uint32_t Discriminator;
131 };
132 
133 raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
134 
135 /// Representation of a single sample record.
136 ///
137 /// A sample record is represented by a positive integer value, which
138 /// indicates how frequently was the associated line location executed.
139 ///
140 /// Additionally, if the associated location contains a function call,
141 /// the record will hold a list of all the possible called targets. For
142 /// direct calls, this will be the exact function being invoked. For
143 /// indirect calls (function pointers, virtual table dispatch), this
144 /// will be a list of one or more functions.
145 class SampleRecord {
146 public:
147   using CallTargetMap = StringMap<uint64_t>;
148 
149   SampleRecord() = default;
150 
151   /// Increment the number of samples for this record by \p S.
152   /// Optionally scale sample count \p S by \p Weight.
153   ///
154   /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
155   /// around unsigned integers.
156   sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) {
157     bool Overflowed;
158     NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed);
159     return Overflowed ? sampleprof_error::counter_overflow
160                       : sampleprof_error::success;
161   }
162 
163   /// Add called function \p F with samples \p S.
164   /// Optionally scale sample count \p S by \p Weight.
165   ///
166   /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
167   /// around unsigned integers.
168   sampleprof_error addCalledTarget(StringRef F, uint64_t S,
169                                    uint64_t Weight = 1) {
170     uint64_t &TargetSamples = CallTargets[F];
171     bool Overflowed;
172     TargetSamples =
173         SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed);
174     return Overflowed ? sampleprof_error::counter_overflow
175                       : sampleprof_error::success;
176   }
177 
178   /// Return true if this sample record contains function calls.
179   bool hasCalls() const { return !CallTargets.empty(); }
180 
181   uint64_t getSamples() const { return NumSamples; }
182   const CallTargetMap &getCallTargets() const { return CallTargets; }
183 
184   /// Merge the samples in \p Other into this record.
185   /// Optionally scale sample counts by \p Weight.
186   sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) {
187     sampleprof_error Result = addSamples(Other.getSamples(), Weight);
188     for (const auto &I : Other.getCallTargets()) {
189       MergeResult(Result, addCalledTarget(I.first(), I.second, Weight));
190     }
191     return Result;
192   }
193 
194   void print(raw_ostream &OS, unsigned Indent) const;
195   void dump() const;
196 
197 private:
198   uint64_t NumSamples = 0;
199   CallTargetMap CallTargets;
200 };
201 
202 raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample);
203 
204 class FunctionSamples;
205 
206 using BodySampleMap = std::map<LineLocation, SampleRecord>;
207 // NOTE: Using a StringMap here makes parsed profiles consume around 17% more
208 // memory, which is *very* significant for large profiles.
209 using FunctionSamplesMap = std::map<std::string, FunctionSamples>;
210 using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>;
211 
212 /// Representation of the samples collected for a function.
213 ///
214 /// This data structure contains all the collected samples for the body
215 /// of a function. Each sample corresponds to a LineLocation instance
216 /// within the body of the function.
217 class FunctionSamples {
218 public:
219   FunctionSamples() = default;
220 
221   void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const;
222   void dump() const;
223 
224   sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) {
225     bool Overflowed;
226     TotalSamples =
227         SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed);
228     return Overflowed ? sampleprof_error::counter_overflow
229                       : sampleprof_error::success;
230   }
231 
232   sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
233     bool Overflowed;
234     TotalHeadSamples =
235         SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed);
236     return Overflowed ? sampleprof_error::counter_overflow
237                       : sampleprof_error::success;
238   }
239 
240   sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator,
241                                   uint64_t Num, uint64_t Weight = 1) {
242     return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(
243         Num, Weight);
244   }
245 
246   sampleprof_error addCalledTargetSamples(uint32_t LineOffset,
247                                           uint32_t Discriminator,
248                                           StringRef FName, uint64_t Num,
249                                           uint64_t Weight = 1) {
250     return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(
251         FName, Num, Weight);
252   }
253 
254   /// Return the number of samples collected at the given location.
255   /// Each location is specified by \p LineOffset and \p Discriminator.
256   /// If the location is not found in profile, return error.
257   ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset,
258                                   uint32_t Discriminator) const {
259     const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
260     if (ret == BodySamples.end())
261       return std::error_code();
262     else
263       return ret->second.getSamples();
264   }
265 
266   /// Returns the call target map collected at a given location.
267   /// Each location is specified by \p LineOffset and \p Discriminator.
268   /// If the location is not found in profile, return error.
269   ErrorOr<SampleRecord::CallTargetMap>
270   findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const {
271     const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
272     if (ret == BodySamples.end())
273       return std::error_code();
274     return ret->second.getCallTargets();
275   }
276 
277   /// Return the function samples at the given callsite location.
278   FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) {
279     return CallsiteSamples[Loc];
280   }
281 
282   /// Returns the FunctionSamplesMap at the given \p Loc.
283   const FunctionSamplesMap *
284   findFunctionSamplesMapAt(const LineLocation &Loc) const {
285     auto iter = CallsiteSamples.find(Loc);
286     if (iter == CallsiteSamples.end())
287       return nullptr;
288     return &iter->second;
289   }
290 
291   /// Returns a pointer to FunctionSamples at the given callsite location \p Loc
292   /// with callee \p CalleeName. If no callsite can be found, relax the
293   /// restriction to return the FunctionSamples at callsite location \p Loc
294   /// with the maximum total sample count.
295   const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc,
296                                                StringRef CalleeName) const {
297     std::string CalleeGUID;
298     CalleeName = getRepInFormat(CalleeName, Format, CalleeGUID);
299 
300     auto iter = CallsiteSamples.find(Loc);
301     if (iter == CallsiteSamples.end())
302       return nullptr;
303     auto FS = iter->second.find(CalleeName);
304     if (FS != iter->second.end())
305       return &FS->second;
306     // If we cannot find exact match of the callee name, return the FS with
307     // the max total count.
308     uint64_t MaxTotalSamples = 0;
309     const FunctionSamples *R = nullptr;
310     for (const auto &NameFS : iter->second)
311       if (NameFS.second.getTotalSamples() >= MaxTotalSamples) {
312         MaxTotalSamples = NameFS.second.getTotalSamples();
313         R = &NameFS.second;
314       }
315     return R;
316   }
317 
318   bool empty() const { return TotalSamples == 0; }
319 
320   /// Return the total number of samples collected inside the function.
321   uint64_t getTotalSamples() const { return TotalSamples; }
322 
323   /// Return the total number of branch samples that have the function as the
324   /// branch target. This should be equivalent to the sample of the first
325   /// instruction of the symbol. But as we directly get this info for raw
326   /// profile without referring to potentially inaccurate debug info, this
327   /// gives more accurate profile data and is preferred for standalone symbols.
328   uint64_t getHeadSamples() const { return TotalHeadSamples; }
329 
330   /// Return the sample count of the first instruction of the function.
331   /// The function can be either a standalone symbol or an inlined function.
332   uint64_t getEntrySamples() const {
333     // Use either BodySamples or CallsiteSamples which ever has the smaller
334     // lineno.
335     if (!BodySamples.empty() &&
336         (CallsiteSamples.empty() ||
337          BodySamples.begin()->first < CallsiteSamples.begin()->first))
338       return BodySamples.begin()->second.getSamples();
339     if (!CallsiteSamples.empty()) {
340       uint64_t T = 0;
341       // An indirect callsite may be promoted to several inlined direct calls.
342       // We need to get the sum of them.
343       for (const auto &N_FS : CallsiteSamples.begin()->second)
344         T += N_FS.second.getEntrySamples();
345       return T;
346     }
347     return 0;
348   }
349 
350   /// Return all the samples collected in the body of the function.
351   const BodySampleMap &getBodySamples() const { return BodySamples; }
352 
353   /// Return all the callsite samples collected in the body of the function.
354   const CallsiteSampleMap &getCallsiteSamples() const {
355     return CallsiteSamples;
356   }
357 
358   /// Merge the samples in \p Other into this one.
359   /// Optionally scale samples by \p Weight.
360   sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) {
361     sampleprof_error Result = sampleprof_error::success;
362     Name = Other.getName();
363     MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight));
364     MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight));
365     for (const auto &I : Other.getBodySamples()) {
366       const LineLocation &Loc = I.first;
367       const SampleRecord &Rec = I.second;
368       MergeResult(Result, BodySamples[Loc].merge(Rec, Weight));
369     }
370     for (const auto &I : Other.getCallsiteSamples()) {
371       const LineLocation &Loc = I.first;
372       FunctionSamplesMap &FSMap = functionSamplesAt(Loc);
373       for (const auto &Rec : I.second)
374         MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight));
375     }
376     return Result;
377   }
378 
379   /// Recursively traverses all children, if the total sample count of the
380   /// corresponding function is no less than \p Threshold, add its corresponding
381   /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
382   /// to \p S.
383   void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M,
384                             uint64_t Threshold) const {
385     if (TotalSamples <= Threshold)
386       return;
387     S.insert(getGUID(Name));
388     // Import hot CallTargets, which may not be available in IR because full
389     // profile annotation cannot be done until backend compilation in ThinLTO.
390     for (const auto &BS : BodySamples)
391       for (const auto &TS : BS.second.getCallTargets())
392         if (TS.getValue() > Threshold) {
393           const Function *Callee =
394               M->getFunction(getNameInModule(TS.getKey(), M));
395           if (!Callee || !Callee->getSubprogram())
396             S.insert(getGUID(TS.getKey()));
397         }
398     for (const auto &CS : CallsiteSamples)
399       for (const auto &NameFS : CS.second)
400         NameFS.second.findInlinedFunctions(S, M, Threshold);
401   }
402 
403   /// Set the name of the function.
404   void setName(StringRef FunctionName) { Name = FunctionName; }
405 
406   /// Return the function name.
407   StringRef getName() const { return Name; }
408 
409   /// Return the original function name if it exists in Module \p M.
410   StringRef getFuncNameInModule(const Module *M) const {
411     return getNameInModule(Name, M);
412   }
413 
414   /// Translate \p Name into its original name in Module.
415   /// When the Format is not SPF_Compact_Binary, \p Name needs no translation.
416   /// When the Format is SPF_Compact_Binary, \p Name in current FunctionSamples
417   /// is actually GUID of the original function name. getNameInModule will
418   /// translate \p Name in current FunctionSamples into its original name.
419   /// If the original name doesn't exist in \p M, return empty StringRef.
420   StringRef getNameInModule(StringRef Name, const Module *M) const {
421     if (Format != SPF_Compact_Binary)
422       return Name;
423     // Expect CurrentModule to be initialized by GUIDToFuncNameMapper.
424     if (M != CurrentModule)
425       llvm_unreachable("Input Module should be the same as CurrentModule");
426     auto iter = GUIDToFuncNameMap.find(std::stoull(Name.data()));
427     if (iter == GUIDToFuncNameMap.end())
428       return StringRef();
429     return iter->second;
430   }
431 
432   /// Returns the line offset to the start line of the subprogram.
433   /// We assume that a single function will not exceed 65535 LOC.
434   static unsigned getOffset(const DILocation *DIL);
435 
436   /// Get the FunctionSamples of the inline instance where DIL originates
437   /// from.
438   ///
439   /// The FunctionSamples of the instruction (Machine or IR) associated to
440   /// \p DIL is the inlined instance in which that instruction is coming from.
441   /// We traverse the inline stack of that instruction, and match it with the
442   /// tree nodes in the profile.
443   ///
444   /// \returns the FunctionSamples pointer to the inlined instance.
445   const FunctionSamples *findFunctionSamples(const DILocation *DIL) const;
446 
447   static SampleProfileFormat Format;
448   /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
449   /// all the function symbols defined or declared in CurrentModule.
450   static DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
451   static Module *CurrentModule;
452 
453   class GUIDToFuncNameMapper {
454   public:
455     GUIDToFuncNameMapper(Module &M) {
456       if (Format != SPF_Compact_Binary)
457         return;
458 
459       for (const auto &F : M) {
460         StringRef OrigName = F.getName();
461         GUIDToFuncNameMap.insert({Function::getGUID(OrigName), OrigName});
462         /// Local to global var promotion used by optimization like thinlto
463         /// will rename the var and add suffix like ".llvm.xxx" to the
464         /// original local name. In sample profile, the suffixes of function
465         /// names are all stripped. Since it is possible that the mapper is
466         /// built in post-thin-link phase and var promotion has been done,
467         /// we need to add the substring of function name without the suffix
468         /// into the GUIDToFuncNameMap.
469         auto pos = OrigName.find('.');
470         if (pos != StringRef::npos) {
471           StringRef NewName = OrigName.substr(0, pos);
472           GUIDToFuncNameMap.insert({Function::getGUID(NewName), NewName});
473         }
474       }
475       CurrentModule = &M;
476     }
477 
478     ~GUIDToFuncNameMapper() {
479       if (Format != SPF_Compact_Binary)
480         return;
481 
482       GUIDToFuncNameMap.clear();
483       CurrentModule = nullptr;
484     }
485   };
486 
487   // Assume the input \p Name is a name coming from FunctionSamples itself.
488   // If the format is SPF_Compact_Binary, the name is already a GUID and we
489   // don't want to return the GUID of GUID.
490   static uint64_t getGUID(StringRef Name) {
491     return (Format == SPF_Compact_Binary) ? std::stoull(Name.data())
492                                           : Function::getGUID(Name);
493   }
494 
495 private:
496   /// Mangled name of the function.
497   StringRef Name;
498 
499   /// Total number of samples collected inside this function.
500   ///
501   /// Samples are cumulative, they include all the samples collected
502   /// inside this function and all its inlined callees.
503   uint64_t TotalSamples = 0;
504 
505   /// Total number of samples collected at the head of the function.
506   /// This is an approximation of the number of calls made to this function
507   /// at runtime.
508   uint64_t TotalHeadSamples = 0;
509 
510   /// Map instruction locations to collected samples.
511   ///
512   /// Each entry in this map contains the number of samples
513   /// collected at the corresponding line offset. All line locations
514   /// are an offset from the start of the function.
515   BodySampleMap BodySamples;
516 
517   /// Map call sites to collected samples for the called function.
518   ///
519   /// Each entry in this map corresponds to all the samples
520   /// collected for the inlined function call at the given
521   /// location. For example, given:
522   ///
523   ///     void foo() {
524   ///  1    bar();
525   ///  ...
526   ///  8    baz();
527   ///     }
528   ///
529   /// If the bar() and baz() calls were inlined inside foo(), this
530   /// map will contain two entries.  One for all the samples collected
531   /// in the call to bar() at line offset 1, the other for all the samples
532   /// collected in the call to baz() at line offset 8.
533   CallsiteSampleMap CallsiteSamples;
534 };
535 
536 raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
537 
538 /// Sort a LocationT->SampleT map by LocationT.
539 ///
540 /// It produces a sorted list of <LocationT, SampleT> records by ascending
541 /// order of LocationT.
542 template <class LocationT, class SampleT> class SampleSorter {
543 public:
544   using SamplesWithLoc = std::pair<const LocationT, SampleT>;
545   using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>;
546 
547   SampleSorter(const std::map<LocationT, SampleT> &Samples) {
548     for (const auto &I : Samples)
549       V.push_back(&I);
550     std::stable_sort(V.begin(), V.end(),
551                      [](const SamplesWithLoc *A, const SamplesWithLoc *B) {
552                        return A->first < B->first;
553                      });
554   }
555 
556   const SamplesWithLocList &get() const { return V; }
557 
558 private:
559   SamplesWithLocList V;
560 };
561 
562 } // end namespace sampleprof
563 } // end namespace llvm
564 
565 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H
566