1 //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains common definitions used in the reading and writing of
10 // sample profile data.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H
15 #define LLVM_PROFILEDATA_SAMPLEPROF_H
16 
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalValue.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/Allocator.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/ErrorOr.h"
28 #include "llvm/Support/MathExtras.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cstdint>
32 #include <map>
33 #include <set>
34 #include <string>
35 #include <system_error>
36 #include <utility>
37 
38 namespace llvm {
39 
40 const std::error_category &sampleprof_category();
41 
42 enum class sampleprof_error {
43   success = 0,
44   bad_magic,
45   unsupported_version,
46   too_large,
47   truncated,
48   malformed,
49   unrecognized_format,
50   unsupported_writing_format,
51   truncated_name_table,
52   not_implemented,
53   counter_overflow,
54   ostream_seek_unsupported,
55   compress_failed,
56   uncompress_failed,
57   zlib_unavailable,
58   hash_mismatch
59 };
60 
make_error_code(sampleprof_error E)61 inline std::error_code make_error_code(sampleprof_error E) {
62   return std::error_code(static_cast<int>(E), sampleprof_category());
63 }
64 
MergeResult(sampleprof_error & Accumulator,sampleprof_error Result)65 inline sampleprof_error MergeResult(sampleprof_error &Accumulator,
66                                     sampleprof_error Result) {
67   // Prefer first error encountered as later errors may be secondary effects of
68   // the initial problem.
69   if (Accumulator == sampleprof_error::success &&
70       Result != sampleprof_error::success)
71     Accumulator = Result;
72   return Accumulator;
73 }
74 
75 } // end namespace llvm
76 
77 namespace std {
78 
79 template <>
80 struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {};
81 
82 } // end namespace std
83 
84 namespace llvm {
85 namespace sampleprof {
86 
87 enum SampleProfileFormat {
88   SPF_None = 0,
89   SPF_Text = 0x1,
90   SPF_Compact_Binary = 0x2,
91   SPF_GCC = 0x3,
92   SPF_Ext_Binary = 0x4,
93   SPF_Binary = 0xff
94 };
95 
96 static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) {
97   return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) |
98          uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) |
99          uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) |
100          uint64_t('2') << (64 - 56) | uint64_t(Format);
101 }
102 
103 /// Get the proper representation of a string according to whether the
104 /// current Format uses MD5 to represent the string.
105 static inline StringRef getRepInFormat(StringRef Name, bool UseMD5,
106                                        std::string &GUIDBuf) {
107   if (Name.empty())
108     return Name;
109   GUIDBuf = std::to_string(Function::getGUID(Name));
110   return UseMD5 ? StringRef(GUIDBuf) : Name;
111 }
112 
113 static inline uint64_t SPVersion() { return 103; }
114 
115 // Section Type used by SampleProfileExtBinaryBaseReader and
116 // SampleProfileExtBinaryBaseWriter. Never change the existing
117 // value of enum. Only append new ones.
118 enum SecType {
119   SecInValid = 0,
120   SecProfSummary = 1,
121   SecNameTable = 2,
122   SecProfileSymbolList = 3,
123   SecFuncOffsetTable = 4,
124   SecFuncMetadata = 5,
125   // marker for the first type of profile.
126   SecFuncProfileFirst = 32,
127   SecLBRProfile = SecFuncProfileFirst
128 };
129 
130 static inline std::string getSecName(SecType Type) {
131   switch (Type) {
132   case SecInValid:
133     return "InvalidSection";
134   case SecProfSummary:
135     return "ProfileSummarySection";
136   case SecNameTable:
137     return "NameTableSection";
138   case SecProfileSymbolList:
139     return "ProfileSymbolListSection";
140   case SecFuncOffsetTable:
141     return "FuncOffsetTableSection";
142   case SecFuncMetadata:
143     return "FunctionMetadata";
144   case SecLBRProfile:
145     return "LBRProfileSection";
146   }
147   llvm_unreachable("A SecType has no name for output");
148 }
149 
150 // Entry type of section header table used by SampleProfileExtBinaryBaseReader
151 // and SampleProfileExtBinaryBaseWriter.
152 struct SecHdrTableEntry {
153   SecType Type;
154   uint64_t Flags;
155   uint64_t Offset;
156   uint64_t Size;
157   // The index indicating the location of the current entry in
158   // SectionHdrLayout table.
159   uint32_t LayoutIndex;
160 };
161 
162 // Flags common for all sections are defined here. In SecHdrTableEntry::Flags,
163 // common flags will be saved in the lower 32bits and section specific flags
164 // will be saved in the higher 32 bits.
165 enum class SecCommonFlags : uint32_t {
166   SecFlagInValid = 0,
167   SecFlagCompress = (1 << 0),
168   // Indicate the section contains only profile without context.
169   SecFlagFlat = (1 << 1)
170 };
171 
172 // Section specific flags are defined here.
173 // !!!Note: Everytime a new enum class is created here, please add
174 // a new check in verifySecFlag.
175 enum class SecNameTableFlags : uint32_t {
176   SecFlagInValid = 0,
177   SecFlagMD5Name = (1 << 0),
178   // Store MD5 in fixed length instead of ULEB128 so NameTable can be
179   // accessed like an array.
180   SecFlagFixedLengthMD5 = (1 << 1),
181   // Profile contains ".__uniq." suffix name. Compiler shouldn't strip
182   // the suffix when doing profile matching when seeing the flag.
183   SecFlagUniqSuffix = (1 << 2)
184 };
185 enum class SecProfSummaryFlags : uint32_t {
186   SecFlagInValid = 0,
187   /// SecFlagPartial means the profile is for common/shared code.
188   /// The common profile is usually merged from profiles collected
189   /// from running other targets.
190   SecFlagPartial = (1 << 0),
191   /// SecFlagContext means this is context-sensitive profile for
192   /// CSSPGO
193   SecFlagFullContext = (1 << 1),
194   /// SecFlagFSDiscriminator means this profile uses flow-sensitive
195   /// discriminators.
196   SecFlagFSDiscriminator = (1 << 2)
197 };
198 
199 enum class SecFuncMetadataFlags : uint32_t {
200   SecFlagInvalid = 0,
201   SecFlagIsProbeBased = (1 << 0),
202   SecFlagHasAttribute = (1 << 1)
203 };
204 
205 // Verify section specific flag is used for the correct section.
206 template <class SecFlagType>
207 static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
208   // No verification is needed for common flags.
209   if (std::is_same<SecCommonFlags, SecFlagType>())
210     return;
211 
212   // Verification starts here for section specific flag.
213   bool IsFlagLegal = false;
214   switch (Type) {
215   case SecNameTable:
216     IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>();
217     break;
218   case SecProfSummary:
219     IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>();
220     break;
221   case SecFuncMetadata:
222     IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>();
223     break;
224   default:
225     break;
226   }
227   if (!IsFlagLegal)
228     llvm_unreachable("Misuse of a flag in an incompatible section");
229 }
230 
231 template <class SecFlagType>
232 static inline void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) {
233   verifySecFlag(Entry.Type, Flag);
234   auto FVal = static_cast<uint64_t>(Flag);
235   bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
236   Entry.Flags |= IsCommon ? FVal : (FVal << 32);
237 }
238 
239 template <class SecFlagType>
240 static inline void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) {
241   verifySecFlag(Entry.Type, Flag);
242   auto FVal = static_cast<uint64_t>(Flag);
243   bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
244   Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32));
245 }
246 
247 template <class SecFlagType>
248 static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag) {
249   verifySecFlag(Entry.Type, Flag);
250   auto FVal = static_cast<uint64_t>(Flag);
251   bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
252   return Entry.Flags & (IsCommon ? FVal : (FVal << 32));
253 }
254 
255 /// Represents the relative location of an instruction.
256 ///
257 /// Instruction locations are specified by the line offset from the
258 /// beginning of the function (marked by the line where the function
259 /// header is) and the discriminator value within that line.
260 ///
261 /// The discriminator value is useful to distinguish instructions
262 /// that are on the same line but belong to different basic blocks
263 /// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
264 struct LineLocation {
265   LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {}
266 
267   void print(raw_ostream &OS) const;
268   void dump() const;
269 
270   bool operator<(const LineLocation &O) const {
271     return LineOffset < O.LineOffset ||
272            (LineOffset == O.LineOffset && Discriminator < O.Discriminator);
273   }
274 
275   bool operator==(const LineLocation &O) const {
276     return LineOffset == O.LineOffset && Discriminator == O.Discriminator;
277   }
278 
279   bool operator!=(const LineLocation &O) const {
280     return LineOffset != O.LineOffset || Discriminator != O.Discriminator;
281   }
282 
283   uint32_t LineOffset;
284   uint32_t Discriminator;
285 };
286 
287 raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
288 
289 /// Representation of a single sample record.
290 ///
291 /// A sample record is represented by a positive integer value, which
292 /// indicates how frequently was the associated line location executed.
293 ///
294 /// Additionally, if the associated location contains a function call,
295 /// the record will hold a list of all the possible called targets. For
296 /// direct calls, this will be the exact function being invoked. For
297 /// indirect calls (function pointers, virtual table dispatch), this
298 /// will be a list of one or more functions.
299 class SampleRecord {
300 public:
301   using CallTarget = std::pair<StringRef, uint64_t>;
302   struct CallTargetComparator {
303     bool operator()(const CallTarget &LHS, const CallTarget &RHS) const {
304       if (LHS.second != RHS.second)
305         return LHS.second > RHS.second;
306 
307       return LHS.first < RHS.first;
308     }
309   };
310 
311   using SortedCallTargetSet = std::set<CallTarget, CallTargetComparator>;
312   using CallTargetMap = StringMap<uint64_t>;
313   SampleRecord() = default;
314 
315   /// Increment the number of samples for this record by \p S.
316   /// Optionally scale sample count \p S by \p Weight.
317   ///
318   /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
319   /// around unsigned integers.
320   sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) {
321     bool Overflowed;
322     NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed);
323     return Overflowed ? sampleprof_error::counter_overflow
324                       : sampleprof_error::success;
325   }
326 
327   /// Add called function \p F with samples \p S.
328   /// Optionally scale sample count \p S by \p Weight.
329   ///
330   /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
331   /// around unsigned integers.
332   sampleprof_error addCalledTarget(StringRef F, uint64_t S,
333                                    uint64_t Weight = 1) {
334     uint64_t &TargetSamples = CallTargets[F];
335     bool Overflowed;
336     TargetSamples =
337         SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed);
338     return Overflowed ? sampleprof_error::counter_overflow
339                       : sampleprof_error::success;
340   }
341 
342   /// Return true if this sample record contains function calls.
343   bool hasCalls() const { return !CallTargets.empty(); }
344 
345   uint64_t getSamples() const { return NumSamples; }
346   const CallTargetMap &getCallTargets() const { return CallTargets; }
347   const SortedCallTargetSet getSortedCallTargets() const {
348     return SortCallTargets(CallTargets);
349   }
350 
351   /// Sort call targets in descending order of call frequency.
352   static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) {
353     SortedCallTargetSet SortedTargets;
354     for (const auto &I : Targets) {
355       SortedTargets.emplace(I.first(), I.second);
356     }
357     return SortedTargets;
358   }
359 
360   /// Prorate call targets by a distribution factor.
361   static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets,
362                                                float DistributionFactor) {
363     CallTargetMap AdjustedTargets;
364     for (const auto &I : Targets) {
365       AdjustedTargets[I.first()] = I.second * DistributionFactor;
366     }
367     return AdjustedTargets;
368   }
369 
370   /// Merge the samples in \p Other into this record.
371   /// Optionally scale sample counts by \p Weight.
372   sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1);
373   void print(raw_ostream &OS, unsigned Indent) const;
374   void dump() const;
375 
376 private:
377   uint64_t NumSamples = 0;
378   CallTargetMap CallTargets;
379 };
380 
381 raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample);
382 
383 // State of context associated with FunctionSamples
384 enum ContextStateMask {
385   UnknownContext = 0x0,   // Profile without context
386   RawContext = 0x1,       // Full context profile from input profile
387   SyntheticContext = 0x2, // Synthetic context created for context promotion
388   InlinedContext = 0x4,   // Profile for context that is inlined into caller
389   MergedContext = 0x8     // Profile for context merged into base profile
390 };
391 
392 // Attribute of context associated with FunctionSamples
393 enum ContextAttributeMask {
394   ContextNone = 0x0,
395   ContextWasInlined = 0x1,      // Leaf of context was inlined in previous build
396   ContextShouldBeInlined = 0x2, // Leaf of context should be inlined
397 };
398 
399 // Sample context for FunctionSamples. It consists of the calling context,
400 // the function name and context state. Internally sample context is represented
401 // using StringRef, which is also the input for constructing a `SampleContext`.
402 // It can accept and represent both full context string as well as context-less
403 // function name.
404 // Example of full context string (note the wrapping `[]`):
405 //    `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
406 // Example of context-less function name (same as AutoFDO):
407 //    `_Z8funcLeafi`
408 class SampleContext {
409 public:
410   SampleContext() : State(UnknownContext), Attributes(ContextNone) {}
411   SampleContext(StringRef ContextStr, ContextStateMask CState = UnknownContext)
412       : Attributes(ContextNone) {
413     setContext(ContextStr, CState);
414   }
415 
416   // Promote context by removing top frames (represented by `ContextStrToRemove`).
417   // Note that with string representation of context, the promotion is effectively
418   // a substr operation with `ContextStrToRemove` removed from left.
419   void promoteOnPath(StringRef ContextStrToRemove) {
420     assert(FullContext.startswith(ContextStrToRemove));
421 
422     // Remove leading context and frame separator " @ ".
423     FullContext = FullContext.substr(ContextStrToRemove.size() + 3);
424     CallingContext = CallingContext.substr(ContextStrToRemove.size() + 3);
425   }
426 
427   // Split the top context frame (left-most substr) from context.
428   static std::pair<StringRef, StringRef>
429   splitContextString(StringRef ContextStr) {
430     return ContextStr.split(" @ ");
431   }
432 
433   // Reconstruct a new context with the last k frames, return the context-less
434   // name if K = 1
435   StringRef getContextWithLastKFrames(uint32_t K) {
436     if (K == 1)
437       return getNameWithoutContext();
438 
439     size_t I = FullContext.size();
440     while (K--) {
441       I = FullContext.find_last_of(" @ ", I);
442       if (I == StringRef::npos)
443         return FullContext;
444       I -= 2;
445     }
446     return FullContext.slice(I + 3, StringRef::npos);
447   }
448 
449   // Decode context string for a frame to get function name and location.
450   // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`.
451   static void decodeContextString(StringRef ContextStr, StringRef &FName,
452                                   LineLocation &LineLoc) {
453     // Get function name
454     auto EntrySplit = ContextStr.split(':');
455     FName = EntrySplit.first;
456 
457     LineLoc = {0, 0};
458     if (!EntrySplit.second.empty()) {
459       // Get line offset, use signed int for getAsInteger so string will
460       // be parsed as signed.
461       int LineOffset = 0;
462       auto LocSplit = EntrySplit.second.split('.');
463       LocSplit.first.getAsInteger(10, LineOffset);
464       LineLoc.LineOffset = LineOffset;
465 
466       // Get discriminator
467       if (!LocSplit.second.empty())
468         LocSplit.second.getAsInteger(10, LineLoc.Discriminator);
469     }
470   }
471 
472   operator StringRef() const { return FullContext; }
473   bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; }
474   void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; }
475   uint32_t getAllAttributes() { return Attributes; }
476   void setAllAttributes(uint32_t A) { Attributes = A; }
477   bool hasState(ContextStateMask S) { return State & (uint32_t)S; }
478   void setState(ContextStateMask S) { State |= (uint32_t)S; }
479   void clearState(ContextStateMask S) { State &= (uint32_t)~S; }
480   bool hasContext() const { return State != UnknownContext; }
481   bool isBaseContext() const { return CallingContext.empty(); }
482   StringRef getNameWithoutContext() const { return Name; }
483   StringRef getCallingContext() const { return CallingContext; }
484   StringRef getNameWithContext() const { return FullContext; }
485 
486 private:
487   // Give a context string, decode and populate internal states like
488   // Function name, Calling context and context state. Example of input
489   // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
490   void setContext(StringRef ContextStr, ContextStateMask CState) {
491     assert(!ContextStr.empty());
492     // Note that `[]` wrapped input indicates a full context string, otherwise
493     // it's treated as context-less function name only.
494     bool HasContext = ContextStr.startswith("[");
495     if (!HasContext && CState == UnknownContext) {
496       State = UnknownContext;
497       Name = FullContext = ContextStr;
498     } else {
499       // Assume raw context profile if unspecified
500       if (CState == UnknownContext)
501         State = RawContext;
502       else
503         State = CState;
504 
505       // Remove encapsulating '[' and ']' if any
506       if (HasContext)
507         FullContext = ContextStr.substr(1, ContextStr.size() - 2);
508       else
509         FullContext = ContextStr;
510 
511       // Caller is to the left of callee in context string
512       auto NameContext = FullContext.rsplit(" @ ");
513       if (NameContext.second.empty()) {
514         Name = NameContext.first;
515         CallingContext = NameContext.second;
516       } else {
517         Name = NameContext.second;
518         CallingContext = NameContext.first;
519       }
520     }
521   }
522 
523   // Full context string including calling context and leaf function name
524   StringRef FullContext;
525   // Function name for the associated sample profile
526   StringRef Name;
527   // Calling context (leaf function excluded) for the associated sample profile
528   StringRef CallingContext;
529   // State of the associated sample profile
530   uint32_t State;
531   // Attribute of the associated sample profile
532   uint32_t Attributes;
533 };
534 
535 class FunctionSamples;
536 class SampleProfileReaderItaniumRemapper;
537 
538 using BodySampleMap = std::map<LineLocation, SampleRecord>;
539 // NOTE: Using a StringMap here makes parsed profiles consume around 17% more
540 // memory, which is *very* significant for large profiles.
541 using FunctionSamplesMap = std::map<std::string, FunctionSamples, std::less<>>;
542 using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>;
543 
544 /// Representation of the samples collected for a function.
545 ///
546 /// This data structure contains all the collected samples for the body
547 /// of a function. Each sample corresponds to a LineLocation instance
548 /// within the body of the function.
549 class FunctionSamples {
550 public:
551   FunctionSamples() = default;
552 
553   void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const;
554   void dump() const;
555 
556   sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) {
557     bool Overflowed;
558     TotalSamples =
559         SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed);
560     return Overflowed ? sampleprof_error::counter_overflow
561                       : sampleprof_error::success;
562   }
563 
564   void setTotalSamples(uint64_t Num) { TotalSamples = Num; }
565 
566   sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
567     bool Overflowed;
568     TotalHeadSamples =
569         SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed);
570     return Overflowed ? sampleprof_error::counter_overflow
571                       : sampleprof_error::success;
572   }
573 
574   sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator,
575                                   uint64_t Num, uint64_t Weight = 1) {
576     return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(
577         Num, Weight);
578   }
579 
580   sampleprof_error addCalledTargetSamples(uint32_t LineOffset,
581                                           uint32_t Discriminator,
582                                           StringRef FName, uint64_t Num,
583                                           uint64_t Weight = 1) {
584     return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(
585         FName, Num, Weight);
586   }
587 
588   sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num,
589                                           uint64_t Weight = 1) {
590     SampleRecord S;
591     S.addSamples(Num, Weight);
592     return BodySamples[LineLocation(Index, 0)].merge(S, Weight);
593   }
594 
595   /// Return the number of samples collected at the given location.
596   /// Each location is specified by \p LineOffset and \p Discriminator.
597   /// If the location is not found in profile, return error.
598   ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset,
599                                   uint32_t Discriminator) const {
600     const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
601     if (ret == BodySamples.end())
602       return std::error_code();
603     return ret->second.getSamples();
604   }
605 
606   /// Returns the call target map collected at a given location.
607   /// Each location is specified by \p LineOffset and \p Discriminator.
608   /// If the location is not found in profile, return error.
609   ErrorOr<SampleRecord::CallTargetMap>
610   findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const {
611     const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
612     if (ret == BodySamples.end())
613       return std::error_code();
614     return ret->second.getCallTargets();
615   }
616 
617   /// Returns the call target map collected at a given location specified by \p
618   /// CallSite. If the location is not found in profile, return error.
619   ErrorOr<SampleRecord::CallTargetMap>
620   findCallTargetMapAt(const LineLocation &CallSite) const {
621     const auto &Ret = BodySamples.find(CallSite);
622     if (Ret == BodySamples.end())
623       return std::error_code();
624     return Ret->second.getCallTargets();
625   }
626 
627   /// Return the function samples at the given callsite location.
628   FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) {
629     return CallsiteSamples[Loc];
630   }
631 
632   /// Returns the FunctionSamplesMap at the given \p Loc.
633   const FunctionSamplesMap *
634   findFunctionSamplesMapAt(const LineLocation &Loc) const {
635     auto iter = CallsiteSamples.find(Loc);
636     if (iter == CallsiteSamples.end())
637       return nullptr;
638     return &iter->second;
639   }
640 
641   /// Returns a pointer to FunctionSamples at the given callsite location
642   /// \p Loc with callee \p CalleeName. If no callsite can be found, relax
643   /// the restriction to return the FunctionSamples at callsite location
644   /// \p Loc with the maximum total sample count. If \p Remapper is not
645   /// nullptr, use \p Remapper to find FunctionSamples with equivalent name
646   /// as \p CalleeName.
647   const FunctionSamples *
648   findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName,
649                         SampleProfileReaderItaniumRemapper *Remapper) const;
650 
651   bool empty() const { return TotalSamples == 0; }
652 
653   /// Return the total number of samples collected inside the function.
654   uint64_t getTotalSamples() const { return TotalSamples; }
655 
656   /// Return the total number of branch samples that have the function as the
657   /// branch target. This should be equivalent to the sample of the first
658   /// instruction of the symbol. But as we directly get this info for raw
659   /// profile without referring to potentially inaccurate debug info, this
660   /// gives more accurate profile data and is preferred for standalone symbols.
661   uint64_t getHeadSamples() const { return TotalHeadSamples; }
662 
663   /// Return the sample count of the first instruction of the function.
664   /// The function can be either a standalone symbol or an inlined function.
665   uint64_t getEntrySamples() const {
666     if (FunctionSamples::ProfileIsCS && getHeadSamples()) {
667       // For CS profile, if we already have more accurate head samples
668       // counted by branch sample from caller, use them as entry samples.
669       return getHeadSamples();
670     }
671     uint64_t Count = 0;
672     // Use either BodySamples or CallsiteSamples which ever has the smaller
673     // lineno.
674     if (!BodySamples.empty() &&
675         (CallsiteSamples.empty() ||
676          BodySamples.begin()->first < CallsiteSamples.begin()->first))
677       Count = BodySamples.begin()->second.getSamples();
678     else if (!CallsiteSamples.empty()) {
679       // An indirect callsite may be promoted to several inlined direct calls.
680       // We need to get the sum of them.
681       for (const auto &N_FS : CallsiteSamples.begin()->second)
682         Count += N_FS.second.getEntrySamples();
683     }
684     // Return at least 1 if total sample is not 0.
685     return Count ? Count : TotalSamples > 0;
686   }
687 
688   /// Return all the samples collected in the body of the function.
689   const BodySampleMap &getBodySamples() const { return BodySamples; }
690 
691   /// Return all the callsite samples collected in the body of the function.
692   const CallsiteSampleMap &getCallsiteSamples() const {
693     return CallsiteSamples;
694   }
695 
696   /// Return the maximum of sample counts in a function body including functions
697   /// inlined in it.
698   uint64_t getMaxCountInside() const {
699     uint64_t MaxCount = 0;
700     for (const auto &L : getBodySamples())
701       MaxCount = std::max(MaxCount, L.second.getSamples());
702     for (const auto &C : getCallsiteSamples())
703       for (const FunctionSamplesMap::value_type &F : C.second)
704         MaxCount = std::max(MaxCount, F.second.getMaxCountInside());
705     return MaxCount;
706   }
707 
708   /// Merge the samples in \p Other into this one.
709   /// Optionally scale samples by \p Weight.
710   sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) {
711     sampleprof_error Result = sampleprof_error::success;
712     Name = Other.getName();
713     if (!GUIDToFuncNameMap)
714       GUIDToFuncNameMap = Other.GUIDToFuncNameMap;
715     if (Context.getNameWithContext().empty())
716       Context = Other.getContext();
717     if (FunctionHash == 0) {
718       // Set the function hash code for the target profile.
719       FunctionHash = Other.getFunctionHash();
720     } else if (FunctionHash != Other.getFunctionHash()) {
721       // The two profiles coming with different valid hash codes indicates
722       // either:
723       // 1. They are same-named static functions from different compilation
724       // units (without using -unique-internal-linkage-names), or
725       // 2. They are really the same function but from different compilations.
726       // Let's bail out in either case for now, which means one profile is
727       // dropped.
728       return sampleprof_error::hash_mismatch;
729     }
730 
731     MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight));
732     MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight));
733     for (const auto &I : Other.getBodySamples()) {
734       const LineLocation &Loc = I.first;
735       const SampleRecord &Rec = I.second;
736       MergeResult(Result, BodySamples[Loc].merge(Rec, Weight));
737     }
738     for (const auto &I : Other.getCallsiteSamples()) {
739       const LineLocation &Loc = I.first;
740       FunctionSamplesMap &FSMap = functionSamplesAt(Loc);
741       for (const auto &Rec : I.second)
742         MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight));
743     }
744     return Result;
745   }
746 
747   /// Recursively traverses all children, if the total sample count of the
748   /// corresponding function is no less than \p Threshold, add its corresponding
749   /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
750   /// to \p S.
751   void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S,
752                             const StringMap<Function *> &SymbolMap,
753                             uint64_t Threshold) const {
754     if (TotalSamples <= Threshold)
755       return;
756     auto isDeclaration = [](const Function *F) {
757       return !F || F->isDeclaration();
758     };
759     if (isDeclaration(SymbolMap.lookup(getFuncName()))) {
760       // Add to the import list only when it's defined out of module.
761       S.insert(getGUID(Name));
762     }
763     // Import hot CallTargets, which may not be available in IR because full
764     // profile annotation cannot be done until backend compilation in ThinLTO.
765     for (const auto &BS : BodySamples)
766       for (const auto &TS : BS.second.getCallTargets())
767         if (TS.getValue() > Threshold) {
768           const Function *Callee = SymbolMap.lookup(getFuncName(TS.getKey()));
769           if (isDeclaration(Callee))
770             S.insert(getGUID(TS.getKey()));
771         }
772     for (const auto &CS : CallsiteSamples)
773       for (const auto &NameFS : CS.second)
774         NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold);
775   }
776 
777   /// Set the name of the function.
778   void setName(StringRef FunctionName) { Name = FunctionName; }
779 
780   /// Return the function name.
781   StringRef getName() const { return Name; }
782 
783   /// Return function name with context.
784   StringRef getNameWithContext() const {
785     return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name;
786   }
787 
788   /// Return the original function name.
789   StringRef getFuncName() const { return getFuncName(Name); }
790 
791   void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; }
792 
793   uint64_t getFunctionHash() const { return FunctionHash; }
794 
795   /// Return the canonical name for a function, taking into account
796   /// suffix elision policy attributes.
797   static StringRef getCanonicalFnName(const Function &F) {
798     auto AttrName = "sample-profile-suffix-elision-policy";
799     auto Attr = F.getFnAttribute(AttrName).getValueAsString();
800     return getCanonicalFnName(F.getName(), Attr);
801   }
802 
803   /// Name suffixes which canonicalization should handle to avoid
804   /// profile mismatch.
805   static constexpr const char *LLVMSuffix = ".llvm.";
806   static constexpr const char *PartSuffix = ".part.";
807   static constexpr const char *UniqSuffix = ".__uniq.";
808 
809   static StringRef getCanonicalFnName(StringRef FnName,
810                                       StringRef Attr = "selected") {
811     // Note the sequence of the suffixes in the knownSuffixes array matters.
812     // If suffix "A" is appended after the suffix "B", "A" should be in front
813     // of "B" in knownSuffixes.
814     const char *knownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix};
815     if (Attr == "" || Attr == "all") {
816       return FnName.split('.').first;
817     } else if (Attr == "selected") {
818       StringRef Cand(FnName);
819       for (const auto &Suf : knownSuffixes) {
820         StringRef Suffix(Suf);
821         // If the profile contains ".__uniq." suffix, don't strip the
822         // suffix for names in the IR.
823         if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix)
824           continue;
825         auto It = Cand.rfind(Suffix);
826         if (It == StringRef::npos)
827           continue;
828         auto Dit = Cand.rfind('.');
829         if (Dit == It + Suffix.size() - 1)
830           Cand = Cand.substr(0, It);
831       }
832       return Cand;
833     } else if (Attr == "none") {
834       return FnName;
835     } else {
836       assert(false && "internal error: unknown suffix elision policy");
837     }
838     return FnName;
839   }
840 
841   /// Translate \p Name into its original name.
842   /// When profile doesn't use MD5, \p Name needs no translation.
843   /// When profile uses MD5, \p Name in current FunctionSamples
844   /// is actually GUID of the original function name. getFuncName will
845   /// translate \p Name in current FunctionSamples into its original name
846   /// by looking up in the function map GUIDToFuncNameMap.
847   /// If the original name doesn't exist in the map, return empty StringRef.
848   StringRef getFuncName(StringRef Name) const {
849     if (!UseMD5)
850       return Name;
851 
852     assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first");
853     return GUIDToFuncNameMap->lookup(std::stoull(Name.data()));
854   }
855 
856   /// Returns the line offset to the start line of the subprogram.
857   /// We assume that a single function will not exceed 65535 LOC.
858   static unsigned getOffset(const DILocation *DIL);
859 
860   /// Returns a unique call site identifier for a given debug location of a call
861   /// instruction. This is wrapper of two scenarios, the probe-based profile and
862   /// regular profile, to hide implementation details from the sample loader and
863   /// the context tracker.
864   static LineLocation getCallSiteIdentifier(const DILocation *DIL);
865 
866   /// Get the FunctionSamples of the inline instance where DIL originates
867   /// from.
868   ///
869   /// The FunctionSamples of the instruction (Machine or IR) associated to
870   /// \p DIL is the inlined instance in which that instruction is coming from.
871   /// We traverse the inline stack of that instruction, and match it with the
872   /// tree nodes in the profile.
873   ///
874   /// \returns the FunctionSamples pointer to the inlined instance.
875   /// If \p Remapper is not nullptr, it will be used to find matching
876   /// FunctionSamples with not exactly the same but equivalent name.
877   const FunctionSamples *findFunctionSamples(
878       const DILocation *DIL,
879       SampleProfileReaderItaniumRemapper *Remapper = nullptr) const;
880 
881   static bool ProfileIsProbeBased;
882 
883   static bool ProfileIsCS;
884 
885   SampleContext &getContext() const { return Context; }
886 
887   void setContext(const SampleContext &FContext) { Context = FContext; }
888 
889   static SampleProfileFormat Format;
890 
891   /// Whether the profile uses MD5 to represent string.
892   static bool UseMD5;
893 
894   /// Whether the profile contains any ".__uniq." suffix in a name.
895   static bool HasUniqSuffix;
896 
897   /// If this profile uses flow sensitive discriminators.
898   static bool ProfileIsFS;
899 
900   /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
901   /// all the function symbols defined or declared in current module.
902   DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr;
903 
904   // Assume the input \p Name is a name coming from FunctionSamples itself.
905   // If UseMD5 is true, the name is already a GUID and we
906   // don't want to return the GUID of GUID.
907   static uint64_t getGUID(StringRef Name) {
908     return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name);
909   }
910 
911   // Find all the names in the current FunctionSamples including names in
912   // all the inline instances and names of call targets.
913   void findAllNames(DenseSet<StringRef> &NameSet) const;
914 
915 private:
916   /// Mangled name of the function.
917   StringRef Name;
918 
919   /// CFG hash value for the function.
920   uint64_t FunctionHash = 0;
921 
922   /// Calling context for function profile
923   mutable SampleContext Context;
924 
925   /// Total number of samples collected inside this function.
926   ///
927   /// Samples are cumulative, they include all the samples collected
928   /// inside this function and all its inlined callees.
929   uint64_t TotalSamples = 0;
930 
931   /// Total number of samples collected at the head of the function.
932   /// This is an approximation of the number of calls made to this function
933   /// at runtime.
934   uint64_t TotalHeadSamples = 0;
935 
936   /// Map instruction locations to collected samples.
937   ///
938   /// Each entry in this map contains the number of samples
939   /// collected at the corresponding line offset. All line locations
940   /// are an offset from the start of the function.
941   BodySampleMap BodySamples;
942 
943   /// Map call sites to collected samples for the called function.
944   ///
945   /// Each entry in this map corresponds to all the samples
946   /// collected for the inlined function call at the given
947   /// location. For example, given:
948   ///
949   ///     void foo() {
950   ///  1    bar();
951   ///  ...
952   ///  8    baz();
953   ///     }
954   ///
955   /// If the bar() and baz() calls were inlined inside foo(), this
956   /// map will contain two entries.  One for all the samples collected
957   /// in the call to bar() at line offset 1, the other for all the samples
958   /// collected in the call to baz() at line offset 8.
959   CallsiteSampleMap CallsiteSamples;
960 };
961 
962 raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
963 
964 /// Sort a LocationT->SampleT map by LocationT.
965 ///
966 /// It produces a sorted list of <LocationT, SampleT> records by ascending
967 /// order of LocationT.
968 template <class LocationT, class SampleT> class SampleSorter {
969 public:
970   using SamplesWithLoc = std::pair<const LocationT, SampleT>;
971   using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>;
972 
973   SampleSorter(const std::map<LocationT, SampleT> &Samples) {
974     for (const auto &I : Samples)
975       V.push_back(&I);
976     llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) {
977       return A->first < B->first;
978     });
979   }
980 
981   const SamplesWithLocList &get() const { return V; }
982 
983 private:
984   SamplesWithLocList V;
985 };
986 
987 /// SampleContextTrimmer impelements helper functions to trim, merge cold
988 /// context profiles. It also supports context profile canonicalization to make
989 /// sure ProfileMap's key is consistent with FunctionSample's name/context.
990 class SampleContextTrimmer {
991 public:
992   SampleContextTrimmer(StringMap<FunctionSamples> &Profiles)
993       : ProfileMap(Profiles){};
994   // Trim and merge cold context profile when requested.
995   void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold,
996                                        bool TrimColdContext,
997                                        bool MergeColdContext,
998                                        uint32_t ColdContextFrameLength);
999   // Canonicalize context profile name and attributes.
1000   void canonicalizeContextProfiles();
1001 
1002 private:
1003   StringMap<FunctionSamples> &ProfileMap;
1004 };
1005 
1006 /// ProfileSymbolList records the list of function symbols shown up
1007 /// in the binary used to generate the profile. It is useful to
1008 /// to discriminate a function being so cold as not to shown up
1009 /// in the profile and a function newly added.
1010 class ProfileSymbolList {
1011 public:
1012   /// copy indicates whether we need to copy the underlying memory
1013   /// for the input Name.
1014   void add(StringRef Name, bool copy = false) {
1015     if (!copy) {
1016       Syms.insert(Name);
1017       return;
1018     }
1019     Syms.insert(Name.copy(Allocator));
1020   }
1021 
1022   bool contains(StringRef Name) { return Syms.count(Name); }
1023 
1024   void merge(const ProfileSymbolList &List) {
1025     for (auto Sym : List.Syms)
1026       add(Sym, true);
1027   }
1028 
1029   unsigned size() { return Syms.size(); }
1030 
1031   void setToCompress(bool TC) { ToCompress = TC; }
1032   bool toCompress() { return ToCompress; }
1033 
1034   std::error_code read(const uint8_t *Data, uint64_t ListSize);
1035   std::error_code write(raw_ostream &OS);
1036   void dump(raw_ostream &OS = dbgs()) const;
1037 
1038 private:
1039   // Determine whether or not to compress the symbol list when
1040   // writing it into profile. The variable is unused when the symbol
1041   // list is read from an existing profile.
1042   bool ToCompress = false;
1043   DenseSet<StringRef> Syms;
1044   BumpPtrAllocator Allocator;
1045 };
1046 
1047 } // end namespace sampleprof
1048 } // end namespace llvm
1049 
1050 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H
1051