1 //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains common definitions used in the reading and writing of
10 // sample profile data.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H
15 #define LLVM_PROFILEDATA_SAMPLEPROF_H
16
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalValue.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/Allocator.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/ErrorOr.h"
28 #include "llvm/Support/MathExtras.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cstdint>
32 #include <map>
33 #include <set>
34 #include <string>
35 #include <system_error>
36 #include <utility>
37
38 namespace llvm {
39
40 const std::error_category &sampleprof_category();
41
42 enum class sampleprof_error {
43 success = 0,
44 bad_magic,
45 unsupported_version,
46 too_large,
47 truncated,
48 malformed,
49 unrecognized_format,
50 unsupported_writing_format,
51 truncated_name_table,
52 not_implemented,
53 counter_overflow,
54 ostream_seek_unsupported,
55 compress_failed,
56 uncompress_failed,
57 zlib_unavailable,
58 hash_mismatch
59 };
60
make_error_code(sampleprof_error E)61 inline std::error_code make_error_code(sampleprof_error E) {
62 return std::error_code(static_cast<int>(E), sampleprof_category());
63 }
64
MergeResult(sampleprof_error & Accumulator,sampleprof_error Result)65 inline sampleprof_error MergeResult(sampleprof_error &Accumulator,
66 sampleprof_error Result) {
67 // Prefer first error encountered as later errors may be secondary effects of
68 // the initial problem.
69 if (Accumulator == sampleprof_error::success &&
70 Result != sampleprof_error::success)
71 Accumulator = Result;
72 return Accumulator;
73 }
74
75 } // end namespace llvm
76
77 namespace std {
78
79 template <>
80 struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {};
81
82 } // end namespace std
83
84 namespace llvm {
85 namespace sampleprof {
86
87 enum SampleProfileFormat {
88 SPF_None = 0,
89 SPF_Text = 0x1,
90 SPF_Compact_Binary = 0x2,
91 SPF_GCC = 0x3,
92 SPF_Ext_Binary = 0x4,
93 SPF_Binary = 0xff
94 };
95
96 static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) {
97 return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) |
98 uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) |
99 uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) |
100 uint64_t('2') << (64 - 56) | uint64_t(Format);
101 }
102
103 /// Get the proper representation of a string according to whether the
104 /// current Format uses MD5 to represent the string.
105 static inline StringRef getRepInFormat(StringRef Name, bool UseMD5,
106 std::string &GUIDBuf) {
107 if (Name.empty())
108 return Name;
109 GUIDBuf = std::to_string(Function::getGUID(Name));
110 return UseMD5 ? StringRef(GUIDBuf) : Name;
111 }
112
113 static inline uint64_t SPVersion() { return 103; }
114
115 // Section Type used by SampleProfileExtBinaryBaseReader and
116 // SampleProfileExtBinaryBaseWriter. Never change the existing
117 // value of enum. Only append new ones.
118 enum SecType {
119 SecInValid = 0,
120 SecProfSummary = 1,
121 SecNameTable = 2,
122 SecProfileSymbolList = 3,
123 SecFuncOffsetTable = 4,
124 SecFuncMetadata = 5,
125 // marker for the first type of profile.
126 SecFuncProfileFirst = 32,
127 SecLBRProfile = SecFuncProfileFirst
128 };
129
130 static inline std::string getSecName(SecType Type) {
131 switch (Type) {
132 case SecInValid:
133 return "InvalidSection";
134 case SecProfSummary:
135 return "ProfileSummarySection";
136 case SecNameTable:
137 return "NameTableSection";
138 case SecProfileSymbolList:
139 return "ProfileSymbolListSection";
140 case SecFuncOffsetTable:
141 return "FuncOffsetTableSection";
142 case SecFuncMetadata:
143 return "FunctionMetadata";
144 case SecLBRProfile:
145 return "LBRProfileSection";
146 }
147 llvm_unreachable("A SecType has no name for output");
148 }
149
150 // Entry type of section header table used by SampleProfileExtBinaryBaseReader
151 // and SampleProfileExtBinaryBaseWriter.
152 struct SecHdrTableEntry {
153 SecType Type;
154 uint64_t Flags;
155 uint64_t Offset;
156 uint64_t Size;
157 // The index indicating the location of the current entry in
158 // SectionHdrLayout table.
159 uint32_t LayoutIndex;
160 };
161
162 // Flags common for all sections are defined here. In SecHdrTableEntry::Flags,
163 // common flags will be saved in the lower 32bits and section specific flags
164 // will be saved in the higher 32 bits.
165 enum class SecCommonFlags : uint32_t {
166 SecFlagInValid = 0,
167 SecFlagCompress = (1 << 0),
168 // Indicate the section contains only profile without context.
169 SecFlagFlat = (1 << 1)
170 };
171
172 // Section specific flags are defined here.
173 // !!!Note: Everytime a new enum class is created here, please add
174 // a new check in verifySecFlag.
175 enum class SecNameTableFlags : uint32_t {
176 SecFlagInValid = 0,
177 SecFlagMD5Name = (1 << 0),
178 // Store MD5 in fixed length instead of ULEB128 so NameTable can be
179 // accessed like an array.
180 SecFlagFixedLengthMD5 = (1 << 1),
181 // Profile contains ".__uniq." suffix name. Compiler shouldn't strip
182 // the suffix when doing profile matching when seeing the flag.
183 SecFlagUniqSuffix = (1 << 2)
184 };
185 enum class SecProfSummaryFlags : uint32_t {
186 SecFlagInValid = 0,
187 /// SecFlagPartial means the profile is for common/shared code.
188 /// The common profile is usually merged from profiles collected
189 /// from running other targets.
190 SecFlagPartial = (1 << 0),
191 /// SecFlagContext means this is context-sensitive profile for
192 /// CSSPGO
193 SecFlagFullContext = (1 << 1),
194 /// SecFlagFSDiscriminator means this profile uses flow-sensitive
195 /// discriminators.
196 SecFlagFSDiscriminator = (1 << 2)
197 };
198
199 enum class SecFuncMetadataFlags : uint32_t {
200 SecFlagInvalid = 0,
201 SecFlagIsProbeBased = (1 << 0),
202 SecFlagHasAttribute = (1 << 1)
203 };
204
205 // Verify section specific flag is used for the correct section.
206 template <class SecFlagType>
207 static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
208 // No verification is needed for common flags.
209 if (std::is_same<SecCommonFlags, SecFlagType>())
210 return;
211
212 // Verification starts here for section specific flag.
213 bool IsFlagLegal = false;
214 switch (Type) {
215 case SecNameTable:
216 IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>();
217 break;
218 case SecProfSummary:
219 IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>();
220 break;
221 case SecFuncMetadata:
222 IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>();
223 break;
224 default:
225 break;
226 }
227 if (!IsFlagLegal)
228 llvm_unreachable("Misuse of a flag in an incompatible section");
229 }
230
231 template <class SecFlagType>
232 static inline void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) {
233 verifySecFlag(Entry.Type, Flag);
234 auto FVal = static_cast<uint64_t>(Flag);
235 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
236 Entry.Flags |= IsCommon ? FVal : (FVal << 32);
237 }
238
239 template <class SecFlagType>
240 static inline void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) {
241 verifySecFlag(Entry.Type, Flag);
242 auto FVal = static_cast<uint64_t>(Flag);
243 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
244 Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32));
245 }
246
247 template <class SecFlagType>
248 static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag) {
249 verifySecFlag(Entry.Type, Flag);
250 auto FVal = static_cast<uint64_t>(Flag);
251 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
252 return Entry.Flags & (IsCommon ? FVal : (FVal << 32));
253 }
254
255 /// Represents the relative location of an instruction.
256 ///
257 /// Instruction locations are specified by the line offset from the
258 /// beginning of the function (marked by the line where the function
259 /// header is) and the discriminator value within that line.
260 ///
261 /// The discriminator value is useful to distinguish instructions
262 /// that are on the same line but belong to different basic blocks
263 /// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
264 struct LineLocation {
265 LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {}
266
267 void print(raw_ostream &OS) const;
268 void dump() const;
269
270 bool operator<(const LineLocation &O) const {
271 return LineOffset < O.LineOffset ||
272 (LineOffset == O.LineOffset && Discriminator < O.Discriminator);
273 }
274
275 bool operator==(const LineLocation &O) const {
276 return LineOffset == O.LineOffset && Discriminator == O.Discriminator;
277 }
278
279 bool operator!=(const LineLocation &O) const {
280 return LineOffset != O.LineOffset || Discriminator != O.Discriminator;
281 }
282
283 uint32_t LineOffset;
284 uint32_t Discriminator;
285 };
286
287 raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
288
289 /// Representation of a single sample record.
290 ///
291 /// A sample record is represented by a positive integer value, which
292 /// indicates how frequently was the associated line location executed.
293 ///
294 /// Additionally, if the associated location contains a function call,
295 /// the record will hold a list of all the possible called targets. For
296 /// direct calls, this will be the exact function being invoked. For
297 /// indirect calls (function pointers, virtual table dispatch), this
298 /// will be a list of one or more functions.
299 class SampleRecord {
300 public:
301 using CallTarget = std::pair<StringRef, uint64_t>;
302 struct CallTargetComparator {
303 bool operator()(const CallTarget &LHS, const CallTarget &RHS) const {
304 if (LHS.second != RHS.second)
305 return LHS.second > RHS.second;
306
307 return LHS.first < RHS.first;
308 }
309 };
310
311 using SortedCallTargetSet = std::set<CallTarget, CallTargetComparator>;
312 using CallTargetMap = StringMap<uint64_t>;
313 SampleRecord() = default;
314
315 /// Increment the number of samples for this record by \p S.
316 /// Optionally scale sample count \p S by \p Weight.
317 ///
318 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
319 /// around unsigned integers.
320 sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) {
321 bool Overflowed;
322 NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed);
323 return Overflowed ? sampleprof_error::counter_overflow
324 : sampleprof_error::success;
325 }
326
327 /// Add called function \p F with samples \p S.
328 /// Optionally scale sample count \p S by \p Weight.
329 ///
330 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
331 /// around unsigned integers.
332 sampleprof_error addCalledTarget(StringRef F, uint64_t S,
333 uint64_t Weight = 1) {
334 uint64_t &TargetSamples = CallTargets[F];
335 bool Overflowed;
336 TargetSamples =
337 SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed);
338 return Overflowed ? sampleprof_error::counter_overflow
339 : sampleprof_error::success;
340 }
341
342 /// Return true if this sample record contains function calls.
343 bool hasCalls() const { return !CallTargets.empty(); }
344
345 uint64_t getSamples() const { return NumSamples; }
346 const CallTargetMap &getCallTargets() const { return CallTargets; }
347 const SortedCallTargetSet getSortedCallTargets() const {
348 return SortCallTargets(CallTargets);
349 }
350
351 /// Sort call targets in descending order of call frequency.
352 static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) {
353 SortedCallTargetSet SortedTargets;
354 for (const auto &I : Targets) {
355 SortedTargets.emplace(I.first(), I.second);
356 }
357 return SortedTargets;
358 }
359
360 /// Prorate call targets by a distribution factor.
361 static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets,
362 float DistributionFactor) {
363 CallTargetMap AdjustedTargets;
364 for (const auto &I : Targets) {
365 AdjustedTargets[I.first()] = I.second * DistributionFactor;
366 }
367 return AdjustedTargets;
368 }
369
370 /// Merge the samples in \p Other into this record.
371 /// Optionally scale sample counts by \p Weight.
372 sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1);
373 void print(raw_ostream &OS, unsigned Indent) const;
374 void dump() const;
375
376 private:
377 uint64_t NumSamples = 0;
378 CallTargetMap CallTargets;
379 };
380
381 raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample);
382
383 // State of context associated with FunctionSamples
384 enum ContextStateMask {
385 UnknownContext = 0x0, // Profile without context
386 RawContext = 0x1, // Full context profile from input profile
387 SyntheticContext = 0x2, // Synthetic context created for context promotion
388 InlinedContext = 0x4, // Profile for context that is inlined into caller
389 MergedContext = 0x8 // Profile for context merged into base profile
390 };
391
392 // Attribute of context associated with FunctionSamples
393 enum ContextAttributeMask {
394 ContextNone = 0x0,
395 ContextWasInlined = 0x1, // Leaf of context was inlined in previous build
396 ContextShouldBeInlined = 0x2, // Leaf of context should be inlined
397 };
398
399 // Sample context for FunctionSamples. It consists of the calling context,
400 // the function name and context state. Internally sample context is represented
401 // using StringRef, which is also the input for constructing a `SampleContext`.
402 // It can accept and represent both full context string as well as context-less
403 // function name.
404 // Example of full context string (note the wrapping `[]`):
405 // `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
406 // Example of context-less function name (same as AutoFDO):
407 // `_Z8funcLeafi`
408 class SampleContext {
409 public:
410 SampleContext() : State(UnknownContext), Attributes(ContextNone) {}
411 SampleContext(StringRef ContextStr, ContextStateMask CState = UnknownContext)
412 : Attributes(ContextNone) {
413 setContext(ContextStr, CState);
414 }
415
416 // Promote context by removing top frames (represented by `ContextStrToRemove`).
417 // Note that with string representation of context, the promotion is effectively
418 // a substr operation with `ContextStrToRemove` removed from left.
419 void promoteOnPath(StringRef ContextStrToRemove) {
420 assert(FullContext.startswith(ContextStrToRemove));
421
422 // Remove leading context and frame separator " @ ".
423 FullContext = FullContext.substr(ContextStrToRemove.size() + 3);
424 CallingContext = CallingContext.substr(ContextStrToRemove.size() + 3);
425 }
426
427 // Split the top context frame (left-most substr) from context.
428 static std::pair<StringRef, StringRef>
429 splitContextString(StringRef ContextStr) {
430 return ContextStr.split(" @ ");
431 }
432
433 // Reconstruct a new context with the last k frames, return the context-less
434 // name if K = 1
435 StringRef getContextWithLastKFrames(uint32_t K) {
436 if (K == 1)
437 return getNameWithoutContext();
438
439 size_t I = FullContext.size();
440 while (K--) {
441 I = FullContext.find_last_of(" @ ", I);
442 if (I == StringRef::npos)
443 return FullContext;
444 I -= 2;
445 }
446 return FullContext.slice(I + 3, StringRef::npos);
447 }
448
449 // Decode context string for a frame to get function name and location.
450 // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`.
451 static void decodeContextString(StringRef ContextStr, StringRef &FName,
452 LineLocation &LineLoc) {
453 // Get function name
454 auto EntrySplit = ContextStr.split(':');
455 FName = EntrySplit.first;
456
457 LineLoc = {0, 0};
458 if (!EntrySplit.second.empty()) {
459 // Get line offset, use signed int for getAsInteger so string will
460 // be parsed as signed.
461 int LineOffset = 0;
462 auto LocSplit = EntrySplit.second.split('.');
463 LocSplit.first.getAsInteger(10, LineOffset);
464 LineLoc.LineOffset = LineOffset;
465
466 // Get discriminator
467 if (!LocSplit.second.empty())
468 LocSplit.second.getAsInteger(10, LineLoc.Discriminator);
469 }
470 }
471
472 operator StringRef() const { return FullContext; }
473 bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; }
474 void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; }
475 uint32_t getAllAttributes() { return Attributes; }
476 void setAllAttributes(uint32_t A) { Attributes = A; }
477 bool hasState(ContextStateMask S) { return State & (uint32_t)S; }
478 void setState(ContextStateMask S) { State |= (uint32_t)S; }
479 void clearState(ContextStateMask S) { State &= (uint32_t)~S; }
480 bool hasContext() const { return State != UnknownContext; }
481 bool isBaseContext() const { return CallingContext.empty(); }
482 StringRef getNameWithoutContext() const { return Name; }
483 StringRef getCallingContext() const { return CallingContext; }
484 StringRef getNameWithContext() const { return FullContext; }
485
486 private:
487 // Give a context string, decode and populate internal states like
488 // Function name, Calling context and context state. Example of input
489 // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
490 void setContext(StringRef ContextStr, ContextStateMask CState) {
491 assert(!ContextStr.empty());
492 // Note that `[]` wrapped input indicates a full context string, otherwise
493 // it's treated as context-less function name only.
494 bool HasContext = ContextStr.startswith("[");
495 if (!HasContext && CState == UnknownContext) {
496 State = UnknownContext;
497 Name = FullContext = ContextStr;
498 } else {
499 // Assume raw context profile if unspecified
500 if (CState == UnknownContext)
501 State = RawContext;
502 else
503 State = CState;
504
505 // Remove encapsulating '[' and ']' if any
506 if (HasContext)
507 FullContext = ContextStr.substr(1, ContextStr.size() - 2);
508 else
509 FullContext = ContextStr;
510
511 // Caller is to the left of callee in context string
512 auto NameContext = FullContext.rsplit(" @ ");
513 if (NameContext.second.empty()) {
514 Name = NameContext.first;
515 CallingContext = NameContext.second;
516 } else {
517 Name = NameContext.second;
518 CallingContext = NameContext.first;
519 }
520 }
521 }
522
523 // Full context string including calling context and leaf function name
524 StringRef FullContext;
525 // Function name for the associated sample profile
526 StringRef Name;
527 // Calling context (leaf function excluded) for the associated sample profile
528 StringRef CallingContext;
529 // State of the associated sample profile
530 uint32_t State;
531 // Attribute of the associated sample profile
532 uint32_t Attributes;
533 };
534
535 class FunctionSamples;
536 class SampleProfileReaderItaniumRemapper;
537
538 using BodySampleMap = std::map<LineLocation, SampleRecord>;
539 // NOTE: Using a StringMap here makes parsed profiles consume around 17% more
540 // memory, which is *very* significant for large profiles.
541 using FunctionSamplesMap = std::map<std::string, FunctionSamples, std::less<>>;
542 using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>;
543
544 /// Representation of the samples collected for a function.
545 ///
546 /// This data structure contains all the collected samples for the body
547 /// of a function. Each sample corresponds to a LineLocation instance
548 /// within the body of the function.
549 class FunctionSamples {
550 public:
551 FunctionSamples() = default;
552
553 void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const;
554 void dump() const;
555
556 sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) {
557 bool Overflowed;
558 TotalSamples =
559 SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed);
560 return Overflowed ? sampleprof_error::counter_overflow
561 : sampleprof_error::success;
562 }
563
564 void setTotalSamples(uint64_t Num) { TotalSamples = Num; }
565
566 sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
567 bool Overflowed;
568 TotalHeadSamples =
569 SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed);
570 return Overflowed ? sampleprof_error::counter_overflow
571 : sampleprof_error::success;
572 }
573
574 sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator,
575 uint64_t Num, uint64_t Weight = 1) {
576 return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(
577 Num, Weight);
578 }
579
580 sampleprof_error addCalledTargetSamples(uint32_t LineOffset,
581 uint32_t Discriminator,
582 StringRef FName, uint64_t Num,
583 uint64_t Weight = 1) {
584 return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(
585 FName, Num, Weight);
586 }
587
588 sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num,
589 uint64_t Weight = 1) {
590 SampleRecord S;
591 S.addSamples(Num, Weight);
592 return BodySamples[LineLocation(Index, 0)].merge(S, Weight);
593 }
594
595 /// Return the number of samples collected at the given location.
596 /// Each location is specified by \p LineOffset and \p Discriminator.
597 /// If the location is not found in profile, return error.
598 ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset,
599 uint32_t Discriminator) const {
600 const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
601 if (ret == BodySamples.end())
602 return std::error_code();
603 return ret->second.getSamples();
604 }
605
606 /// Returns the call target map collected at a given location.
607 /// Each location is specified by \p LineOffset and \p Discriminator.
608 /// If the location is not found in profile, return error.
609 ErrorOr<SampleRecord::CallTargetMap>
610 findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const {
611 const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
612 if (ret == BodySamples.end())
613 return std::error_code();
614 return ret->second.getCallTargets();
615 }
616
617 /// Returns the call target map collected at a given location specified by \p
618 /// CallSite. If the location is not found in profile, return error.
619 ErrorOr<SampleRecord::CallTargetMap>
620 findCallTargetMapAt(const LineLocation &CallSite) const {
621 const auto &Ret = BodySamples.find(CallSite);
622 if (Ret == BodySamples.end())
623 return std::error_code();
624 return Ret->second.getCallTargets();
625 }
626
627 /// Return the function samples at the given callsite location.
628 FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) {
629 return CallsiteSamples[Loc];
630 }
631
632 /// Returns the FunctionSamplesMap at the given \p Loc.
633 const FunctionSamplesMap *
634 findFunctionSamplesMapAt(const LineLocation &Loc) const {
635 auto iter = CallsiteSamples.find(Loc);
636 if (iter == CallsiteSamples.end())
637 return nullptr;
638 return &iter->second;
639 }
640
641 /// Returns a pointer to FunctionSamples at the given callsite location
642 /// \p Loc with callee \p CalleeName. If no callsite can be found, relax
643 /// the restriction to return the FunctionSamples at callsite location
644 /// \p Loc with the maximum total sample count. If \p Remapper is not
645 /// nullptr, use \p Remapper to find FunctionSamples with equivalent name
646 /// as \p CalleeName.
647 const FunctionSamples *
648 findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName,
649 SampleProfileReaderItaniumRemapper *Remapper) const;
650
651 bool empty() const { return TotalSamples == 0; }
652
653 /// Return the total number of samples collected inside the function.
654 uint64_t getTotalSamples() const { return TotalSamples; }
655
656 /// Return the total number of branch samples that have the function as the
657 /// branch target. This should be equivalent to the sample of the first
658 /// instruction of the symbol. But as we directly get this info for raw
659 /// profile without referring to potentially inaccurate debug info, this
660 /// gives more accurate profile data and is preferred for standalone symbols.
661 uint64_t getHeadSamples() const { return TotalHeadSamples; }
662
663 /// Return the sample count of the first instruction of the function.
664 /// The function can be either a standalone symbol or an inlined function.
665 uint64_t getEntrySamples() const {
666 if (FunctionSamples::ProfileIsCS && getHeadSamples()) {
667 // For CS profile, if we already have more accurate head samples
668 // counted by branch sample from caller, use them as entry samples.
669 return getHeadSamples();
670 }
671 uint64_t Count = 0;
672 // Use either BodySamples or CallsiteSamples which ever has the smaller
673 // lineno.
674 if (!BodySamples.empty() &&
675 (CallsiteSamples.empty() ||
676 BodySamples.begin()->first < CallsiteSamples.begin()->first))
677 Count = BodySamples.begin()->second.getSamples();
678 else if (!CallsiteSamples.empty()) {
679 // An indirect callsite may be promoted to several inlined direct calls.
680 // We need to get the sum of them.
681 for (const auto &N_FS : CallsiteSamples.begin()->second)
682 Count += N_FS.second.getEntrySamples();
683 }
684 // Return at least 1 if total sample is not 0.
685 return Count ? Count : TotalSamples > 0;
686 }
687
688 /// Return all the samples collected in the body of the function.
689 const BodySampleMap &getBodySamples() const { return BodySamples; }
690
691 /// Return all the callsite samples collected in the body of the function.
692 const CallsiteSampleMap &getCallsiteSamples() const {
693 return CallsiteSamples;
694 }
695
696 /// Return the maximum of sample counts in a function body including functions
697 /// inlined in it.
698 uint64_t getMaxCountInside() const {
699 uint64_t MaxCount = 0;
700 for (const auto &L : getBodySamples())
701 MaxCount = std::max(MaxCount, L.second.getSamples());
702 for (const auto &C : getCallsiteSamples())
703 for (const FunctionSamplesMap::value_type &F : C.second)
704 MaxCount = std::max(MaxCount, F.second.getMaxCountInside());
705 return MaxCount;
706 }
707
708 /// Merge the samples in \p Other into this one.
709 /// Optionally scale samples by \p Weight.
710 sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) {
711 sampleprof_error Result = sampleprof_error::success;
712 Name = Other.getName();
713 if (!GUIDToFuncNameMap)
714 GUIDToFuncNameMap = Other.GUIDToFuncNameMap;
715 if (Context.getNameWithContext().empty())
716 Context = Other.getContext();
717 if (FunctionHash == 0) {
718 // Set the function hash code for the target profile.
719 FunctionHash = Other.getFunctionHash();
720 } else if (FunctionHash != Other.getFunctionHash()) {
721 // The two profiles coming with different valid hash codes indicates
722 // either:
723 // 1. They are same-named static functions from different compilation
724 // units (without using -unique-internal-linkage-names), or
725 // 2. They are really the same function but from different compilations.
726 // Let's bail out in either case for now, which means one profile is
727 // dropped.
728 return sampleprof_error::hash_mismatch;
729 }
730
731 MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight));
732 MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight));
733 for (const auto &I : Other.getBodySamples()) {
734 const LineLocation &Loc = I.first;
735 const SampleRecord &Rec = I.second;
736 MergeResult(Result, BodySamples[Loc].merge(Rec, Weight));
737 }
738 for (const auto &I : Other.getCallsiteSamples()) {
739 const LineLocation &Loc = I.first;
740 FunctionSamplesMap &FSMap = functionSamplesAt(Loc);
741 for (const auto &Rec : I.second)
742 MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight));
743 }
744 return Result;
745 }
746
747 /// Recursively traverses all children, if the total sample count of the
748 /// corresponding function is no less than \p Threshold, add its corresponding
749 /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
750 /// to \p S.
751 void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S,
752 const StringMap<Function *> &SymbolMap,
753 uint64_t Threshold) const {
754 if (TotalSamples <= Threshold)
755 return;
756 auto isDeclaration = [](const Function *F) {
757 return !F || F->isDeclaration();
758 };
759 if (isDeclaration(SymbolMap.lookup(getFuncName()))) {
760 // Add to the import list only when it's defined out of module.
761 S.insert(getGUID(Name));
762 }
763 // Import hot CallTargets, which may not be available in IR because full
764 // profile annotation cannot be done until backend compilation in ThinLTO.
765 for (const auto &BS : BodySamples)
766 for (const auto &TS : BS.second.getCallTargets())
767 if (TS.getValue() > Threshold) {
768 const Function *Callee = SymbolMap.lookup(getFuncName(TS.getKey()));
769 if (isDeclaration(Callee))
770 S.insert(getGUID(TS.getKey()));
771 }
772 for (const auto &CS : CallsiteSamples)
773 for (const auto &NameFS : CS.second)
774 NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold);
775 }
776
777 /// Set the name of the function.
778 void setName(StringRef FunctionName) { Name = FunctionName; }
779
780 /// Return the function name.
781 StringRef getName() const { return Name; }
782
783 /// Return function name with context.
784 StringRef getNameWithContext() const {
785 return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name;
786 }
787
788 /// Return the original function name.
789 StringRef getFuncName() const { return getFuncName(Name); }
790
791 void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; }
792
793 uint64_t getFunctionHash() const { return FunctionHash; }
794
795 /// Return the canonical name for a function, taking into account
796 /// suffix elision policy attributes.
797 static StringRef getCanonicalFnName(const Function &F) {
798 auto AttrName = "sample-profile-suffix-elision-policy";
799 auto Attr = F.getFnAttribute(AttrName).getValueAsString();
800 return getCanonicalFnName(F.getName(), Attr);
801 }
802
803 /// Name suffixes which canonicalization should handle to avoid
804 /// profile mismatch.
805 static constexpr const char *LLVMSuffix = ".llvm.";
806 static constexpr const char *PartSuffix = ".part.";
807 static constexpr const char *UniqSuffix = ".__uniq.";
808
809 static StringRef getCanonicalFnName(StringRef FnName,
810 StringRef Attr = "selected") {
811 // Note the sequence of the suffixes in the knownSuffixes array matters.
812 // If suffix "A" is appended after the suffix "B", "A" should be in front
813 // of "B" in knownSuffixes.
814 const char *knownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix};
815 if (Attr == "" || Attr == "all") {
816 return FnName.split('.').first;
817 } else if (Attr == "selected") {
818 StringRef Cand(FnName);
819 for (const auto &Suf : knownSuffixes) {
820 StringRef Suffix(Suf);
821 // If the profile contains ".__uniq." suffix, don't strip the
822 // suffix for names in the IR.
823 if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix)
824 continue;
825 auto It = Cand.rfind(Suffix);
826 if (It == StringRef::npos)
827 continue;
828 auto Dit = Cand.rfind('.');
829 if (Dit == It + Suffix.size() - 1)
830 Cand = Cand.substr(0, It);
831 }
832 return Cand;
833 } else if (Attr == "none") {
834 return FnName;
835 } else {
836 assert(false && "internal error: unknown suffix elision policy");
837 }
838 return FnName;
839 }
840
841 /// Translate \p Name into its original name.
842 /// When profile doesn't use MD5, \p Name needs no translation.
843 /// When profile uses MD5, \p Name in current FunctionSamples
844 /// is actually GUID of the original function name. getFuncName will
845 /// translate \p Name in current FunctionSamples into its original name
846 /// by looking up in the function map GUIDToFuncNameMap.
847 /// If the original name doesn't exist in the map, return empty StringRef.
848 StringRef getFuncName(StringRef Name) const {
849 if (!UseMD5)
850 return Name;
851
852 assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first");
853 return GUIDToFuncNameMap->lookup(std::stoull(Name.data()));
854 }
855
856 /// Returns the line offset to the start line of the subprogram.
857 /// We assume that a single function will not exceed 65535 LOC.
858 static unsigned getOffset(const DILocation *DIL);
859
860 /// Returns a unique call site identifier for a given debug location of a call
861 /// instruction. This is wrapper of two scenarios, the probe-based profile and
862 /// regular profile, to hide implementation details from the sample loader and
863 /// the context tracker.
864 static LineLocation getCallSiteIdentifier(const DILocation *DIL);
865
866 /// Get the FunctionSamples of the inline instance where DIL originates
867 /// from.
868 ///
869 /// The FunctionSamples of the instruction (Machine or IR) associated to
870 /// \p DIL is the inlined instance in which that instruction is coming from.
871 /// We traverse the inline stack of that instruction, and match it with the
872 /// tree nodes in the profile.
873 ///
874 /// \returns the FunctionSamples pointer to the inlined instance.
875 /// If \p Remapper is not nullptr, it will be used to find matching
876 /// FunctionSamples with not exactly the same but equivalent name.
877 const FunctionSamples *findFunctionSamples(
878 const DILocation *DIL,
879 SampleProfileReaderItaniumRemapper *Remapper = nullptr) const;
880
881 static bool ProfileIsProbeBased;
882
883 static bool ProfileIsCS;
884
885 SampleContext &getContext() const { return Context; }
886
887 void setContext(const SampleContext &FContext) { Context = FContext; }
888
889 static SampleProfileFormat Format;
890
891 /// Whether the profile uses MD5 to represent string.
892 static bool UseMD5;
893
894 /// Whether the profile contains any ".__uniq." suffix in a name.
895 static bool HasUniqSuffix;
896
897 /// If this profile uses flow sensitive discriminators.
898 static bool ProfileIsFS;
899
900 /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
901 /// all the function symbols defined or declared in current module.
902 DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr;
903
904 // Assume the input \p Name is a name coming from FunctionSamples itself.
905 // If UseMD5 is true, the name is already a GUID and we
906 // don't want to return the GUID of GUID.
907 static uint64_t getGUID(StringRef Name) {
908 return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name);
909 }
910
911 // Find all the names in the current FunctionSamples including names in
912 // all the inline instances and names of call targets.
913 void findAllNames(DenseSet<StringRef> &NameSet) const;
914
915 private:
916 /// Mangled name of the function.
917 StringRef Name;
918
919 /// CFG hash value for the function.
920 uint64_t FunctionHash = 0;
921
922 /// Calling context for function profile
923 mutable SampleContext Context;
924
925 /// Total number of samples collected inside this function.
926 ///
927 /// Samples are cumulative, they include all the samples collected
928 /// inside this function and all its inlined callees.
929 uint64_t TotalSamples = 0;
930
931 /// Total number of samples collected at the head of the function.
932 /// This is an approximation of the number of calls made to this function
933 /// at runtime.
934 uint64_t TotalHeadSamples = 0;
935
936 /// Map instruction locations to collected samples.
937 ///
938 /// Each entry in this map contains the number of samples
939 /// collected at the corresponding line offset. All line locations
940 /// are an offset from the start of the function.
941 BodySampleMap BodySamples;
942
943 /// Map call sites to collected samples for the called function.
944 ///
945 /// Each entry in this map corresponds to all the samples
946 /// collected for the inlined function call at the given
947 /// location. For example, given:
948 ///
949 /// void foo() {
950 /// 1 bar();
951 /// ...
952 /// 8 baz();
953 /// }
954 ///
955 /// If the bar() and baz() calls were inlined inside foo(), this
956 /// map will contain two entries. One for all the samples collected
957 /// in the call to bar() at line offset 1, the other for all the samples
958 /// collected in the call to baz() at line offset 8.
959 CallsiteSampleMap CallsiteSamples;
960 };
961
962 raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
963
964 /// Sort a LocationT->SampleT map by LocationT.
965 ///
966 /// It produces a sorted list of <LocationT, SampleT> records by ascending
967 /// order of LocationT.
968 template <class LocationT, class SampleT> class SampleSorter {
969 public:
970 using SamplesWithLoc = std::pair<const LocationT, SampleT>;
971 using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>;
972
973 SampleSorter(const std::map<LocationT, SampleT> &Samples) {
974 for (const auto &I : Samples)
975 V.push_back(&I);
976 llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) {
977 return A->first < B->first;
978 });
979 }
980
981 const SamplesWithLocList &get() const { return V; }
982
983 private:
984 SamplesWithLocList V;
985 };
986
987 /// SampleContextTrimmer impelements helper functions to trim, merge cold
988 /// context profiles. It also supports context profile canonicalization to make
989 /// sure ProfileMap's key is consistent with FunctionSample's name/context.
990 class SampleContextTrimmer {
991 public:
992 SampleContextTrimmer(StringMap<FunctionSamples> &Profiles)
993 : ProfileMap(Profiles){};
994 // Trim and merge cold context profile when requested.
995 void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold,
996 bool TrimColdContext,
997 bool MergeColdContext,
998 uint32_t ColdContextFrameLength);
999 // Canonicalize context profile name and attributes.
1000 void canonicalizeContextProfiles();
1001
1002 private:
1003 StringMap<FunctionSamples> &ProfileMap;
1004 };
1005
1006 /// ProfileSymbolList records the list of function symbols shown up
1007 /// in the binary used to generate the profile. It is useful to
1008 /// to discriminate a function being so cold as not to shown up
1009 /// in the profile and a function newly added.
1010 class ProfileSymbolList {
1011 public:
1012 /// copy indicates whether we need to copy the underlying memory
1013 /// for the input Name.
1014 void add(StringRef Name, bool copy = false) {
1015 if (!copy) {
1016 Syms.insert(Name);
1017 return;
1018 }
1019 Syms.insert(Name.copy(Allocator));
1020 }
1021
1022 bool contains(StringRef Name) { return Syms.count(Name); }
1023
1024 void merge(const ProfileSymbolList &List) {
1025 for (auto Sym : List.Syms)
1026 add(Sym, true);
1027 }
1028
1029 unsigned size() { return Syms.size(); }
1030
1031 void setToCompress(bool TC) { ToCompress = TC; }
1032 bool toCompress() { return ToCompress; }
1033
1034 std::error_code read(const uint8_t *Data, uint64_t ListSize);
1035 std::error_code write(raw_ostream &OS);
1036 void dump(raw_ostream &OS = dbgs()) const;
1037
1038 private:
1039 // Determine whether or not to compress the symbol list when
1040 // writing it into profile. The variable is unused when the symbol
1041 // list is read from an existing profile.
1042 bool ToCompress = false;
1043 DenseSet<StringRef> Syms;
1044 BumpPtrAllocator Allocator;
1045 };
1046
1047 } // end namespace sampleprof
1048 } // end namespace llvm
1049
1050 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H
1051