1 //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains common definitions used in the reading and writing of 10 // sample profile data. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H 15 #define LLVM_PROFILEDATA_SAMPLEPROF_H 16 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/IR/Function.h" 22 #include "llvm/IR/GlobalValue.h" 23 #include "llvm/ProfileData/FunctionId.h" 24 #include "llvm/Support/Allocator.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/ErrorOr.h" 27 #include "llvm/Support/MathExtras.h" 28 #include "llvm/ProfileData/HashKeyMap.h" 29 #include <algorithm> 30 #include <cstdint> 31 #include <list> 32 #include <map> 33 #include <set> 34 #include <sstream> 35 #include <string> 36 #include <system_error> 37 #include <unordered_map> 38 #include <utility> 39 40 namespace llvm { 41 42 class DILocation; 43 class raw_ostream; 44 45 const std::error_category &sampleprof_category(); 46 47 enum class sampleprof_error { 48 success = 0, 49 bad_magic, 50 unsupported_version, 51 too_large, 52 truncated, 53 malformed, 54 unrecognized_format, 55 unsupported_writing_format, 56 truncated_name_table, 57 not_implemented, 58 counter_overflow, 59 ostream_seek_unsupported, 60 uncompress_failed, 61 zlib_unavailable, 62 hash_mismatch 63 }; 64 65 inline std::error_code make_error_code(sampleprof_error E) { 66 return std::error_code(static_cast<int>(E), sampleprof_category()); 67 } 68 69 inline sampleprof_error MergeResult(sampleprof_error &Accumulator, 70 sampleprof_error Result) { 71 // Prefer first error encountered as later errors may be secondary effects of 72 // the initial problem. 73 if (Accumulator == sampleprof_error::success && 74 Result != sampleprof_error::success) 75 Accumulator = Result; 76 return Accumulator; 77 } 78 79 } // end namespace llvm 80 81 namespace std { 82 83 template <> 84 struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {}; 85 86 } // end namespace std 87 88 namespace llvm { 89 namespace sampleprof { 90 91 enum SampleProfileFormat { 92 SPF_None = 0, 93 SPF_Text = 0x1, 94 SPF_Compact_Binary = 0x2, // Deprecated 95 SPF_GCC = 0x3, 96 SPF_Ext_Binary = 0x4, 97 SPF_Binary = 0xff 98 }; 99 100 enum SampleProfileLayout { 101 SPL_None = 0, 102 SPL_Nest = 0x1, 103 SPL_Flat = 0x2, 104 }; 105 106 static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) { 107 return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) | 108 uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) | 109 uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) | 110 uint64_t('2') << (64 - 56) | uint64_t(Format); 111 } 112 113 static inline uint64_t SPVersion() { return 103; } 114 115 // Section Type used by SampleProfileExtBinaryBaseReader and 116 // SampleProfileExtBinaryBaseWriter. Never change the existing 117 // value of enum. Only append new ones. 118 enum SecType { 119 SecInValid = 0, 120 SecProfSummary = 1, 121 SecNameTable = 2, 122 SecProfileSymbolList = 3, 123 SecFuncOffsetTable = 4, 124 SecFuncMetadata = 5, 125 SecCSNameTable = 6, 126 // marker for the first type of profile. 127 SecFuncProfileFirst = 32, 128 SecLBRProfile = SecFuncProfileFirst 129 }; 130 131 static inline std::string getSecName(SecType Type) { 132 switch ((int)Type) { // Avoid -Wcovered-switch-default 133 case SecInValid: 134 return "InvalidSection"; 135 case SecProfSummary: 136 return "ProfileSummarySection"; 137 case SecNameTable: 138 return "NameTableSection"; 139 case SecProfileSymbolList: 140 return "ProfileSymbolListSection"; 141 case SecFuncOffsetTable: 142 return "FuncOffsetTableSection"; 143 case SecFuncMetadata: 144 return "FunctionMetadata"; 145 case SecCSNameTable: 146 return "CSNameTableSection"; 147 case SecLBRProfile: 148 return "LBRProfileSection"; 149 default: 150 return "UnknownSection"; 151 } 152 } 153 154 // Entry type of section header table used by SampleProfileExtBinaryBaseReader 155 // and SampleProfileExtBinaryBaseWriter. 156 struct SecHdrTableEntry { 157 SecType Type; 158 uint64_t Flags; 159 uint64_t Offset; 160 uint64_t Size; 161 // The index indicating the location of the current entry in 162 // SectionHdrLayout table. 163 uint64_t LayoutIndex; 164 }; 165 166 // Flags common for all sections are defined here. In SecHdrTableEntry::Flags, 167 // common flags will be saved in the lower 32bits and section specific flags 168 // will be saved in the higher 32 bits. 169 enum class SecCommonFlags : uint32_t { 170 SecFlagInValid = 0, 171 SecFlagCompress = (1 << 0), 172 // Indicate the section contains only profile without context. 173 SecFlagFlat = (1 << 1) 174 }; 175 176 // Section specific flags are defined here. 177 // !!!Note: Everytime a new enum class is created here, please add 178 // a new check in verifySecFlag. 179 enum class SecNameTableFlags : uint32_t { 180 SecFlagInValid = 0, 181 SecFlagMD5Name = (1 << 0), 182 // Store MD5 in fixed length instead of ULEB128 so NameTable can be 183 // accessed like an array. 184 SecFlagFixedLengthMD5 = (1 << 1), 185 // Profile contains ".__uniq." suffix name. Compiler shouldn't strip 186 // the suffix when doing profile matching when seeing the flag. 187 SecFlagUniqSuffix = (1 << 2) 188 }; 189 enum class SecProfSummaryFlags : uint32_t { 190 SecFlagInValid = 0, 191 /// SecFlagPartial means the profile is for common/shared code. 192 /// The common profile is usually merged from profiles collected 193 /// from running other targets. 194 SecFlagPartial = (1 << 0), 195 /// SecFlagContext means this is context-sensitive flat profile for 196 /// CSSPGO 197 SecFlagFullContext = (1 << 1), 198 /// SecFlagFSDiscriminator means this profile uses flow-sensitive 199 /// discriminators. 200 SecFlagFSDiscriminator = (1 << 2), 201 /// SecFlagIsPreInlined means this profile contains ShouldBeInlined 202 /// contexts thus this is CS preinliner computed. 203 SecFlagIsPreInlined = (1 << 4), 204 }; 205 206 enum class SecFuncMetadataFlags : uint32_t { 207 SecFlagInvalid = 0, 208 SecFlagIsProbeBased = (1 << 0), 209 SecFlagHasAttribute = (1 << 1), 210 }; 211 212 enum class SecFuncOffsetFlags : uint32_t { 213 SecFlagInvalid = 0, 214 // Store function offsets in an order of contexts. The order ensures that 215 // callee contexts of a given context laid out next to it. 216 SecFlagOrdered = (1 << 0), 217 }; 218 219 // Verify section specific flag is used for the correct section. 220 template <class SecFlagType> 221 static inline void verifySecFlag(SecType Type, SecFlagType Flag) { 222 // No verification is needed for common flags. 223 if (std::is_same<SecCommonFlags, SecFlagType>()) 224 return; 225 226 // Verification starts here for section specific flag. 227 bool IsFlagLegal = false; 228 switch (Type) { 229 case SecNameTable: 230 IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>(); 231 break; 232 case SecProfSummary: 233 IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>(); 234 break; 235 case SecFuncMetadata: 236 IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>(); 237 break; 238 default: 239 case SecFuncOffsetTable: 240 IsFlagLegal = std::is_same<SecFuncOffsetFlags, SecFlagType>(); 241 break; 242 } 243 if (!IsFlagLegal) 244 llvm_unreachable("Misuse of a flag in an incompatible section"); 245 } 246 247 template <class SecFlagType> 248 static inline void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) { 249 verifySecFlag(Entry.Type, Flag); 250 auto FVal = static_cast<uint64_t>(Flag); 251 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); 252 Entry.Flags |= IsCommon ? FVal : (FVal << 32); 253 } 254 255 template <class SecFlagType> 256 static inline void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) { 257 verifySecFlag(Entry.Type, Flag); 258 auto FVal = static_cast<uint64_t>(Flag); 259 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); 260 Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32)); 261 } 262 263 template <class SecFlagType> 264 static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag) { 265 verifySecFlag(Entry.Type, Flag); 266 auto FVal = static_cast<uint64_t>(Flag); 267 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); 268 return Entry.Flags & (IsCommon ? FVal : (FVal << 32)); 269 } 270 271 /// Represents the relative location of an instruction. 272 /// 273 /// Instruction locations are specified by the line offset from the 274 /// beginning of the function (marked by the line where the function 275 /// header is) and the discriminator value within that line. 276 /// 277 /// The discriminator value is useful to distinguish instructions 278 /// that are on the same line but belong to different basic blocks 279 /// (e.g., the two post-increment instructions in "if (p) x++; else y++;"). 280 struct LineLocation { 281 LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {} 282 283 void print(raw_ostream &OS) const; 284 void dump() const; 285 286 bool operator<(const LineLocation &O) const { 287 return LineOffset < O.LineOffset || 288 (LineOffset == O.LineOffset && Discriminator < O.Discriminator); 289 } 290 291 bool operator==(const LineLocation &O) const { 292 return LineOffset == O.LineOffset && Discriminator == O.Discriminator; 293 } 294 295 bool operator!=(const LineLocation &O) const { 296 return LineOffset != O.LineOffset || Discriminator != O.Discriminator; 297 } 298 299 uint64_t getHashCode() const { 300 return ((uint64_t) Discriminator << 32) | LineOffset; 301 } 302 303 uint32_t LineOffset; 304 uint32_t Discriminator; 305 }; 306 307 struct LineLocationHash { 308 uint64_t operator()(const LineLocation &Loc) const { 309 return Loc.getHashCode(); 310 } 311 }; 312 313 raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc); 314 315 /// Representation of a single sample record. 316 /// 317 /// A sample record is represented by a positive integer value, which 318 /// indicates how frequently was the associated line location executed. 319 /// 320 /// Additionally, if the associated location contains a function call, 321 /// the record will hold a list of all the possible called targets. For 322 /// direct calls, this will be the exact function being invoked. For 323 /// indirect calls (function pointers, virtual table dispatch), this 324 /// will be a list of one or more functions. 325 class SampleRecord { 326 public: 327 using CallTarget = std::pair<FunctionId, uint64_t>; 328 struct CallTargetComparator { 329 bool operator()(const CallTarget &LHS, const CallTarget &RHS) const { 330 if (LHS.second != RHS.second) 331 return LHS.second > RHS.second; 332 333 return LHS.first < RHS.first; 334 } 335 }; 336 337 using SortedCallTargetSet = std::set<CallTarget, CallTargetComparator>; 338 using CallTargetMap = std::unordered_map<FunctionId, uint64_t>; 339 SampleRecord() = default; 340 341 /// Increment the number of samples for this record by \p S. 342 /// Optionally scale sample count \p S by \p Weight. 343 /// 344 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping 345 /// around unsigned integers. 346 sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) { 347 bool Overflowed; 348 NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed); 349 return Overflowed ? sampleprof_error::counter_overflow 350 : sampleprof_error::success; 351 } 352 353 /// Decrease the number of samples for this record by \p S. Return the amout 354 /// of samples actually decreased. 355 uint64_t removeSamples(uint64_t S) { 356 if (S > NumSamples) 357 S = NumSamples; 358 NumSamples -= S; 359 return S; 360 } 361 362 /// Add called function \p F with samples \p S. 363 /// Optionally scale sample count \p S by \p Weight. 364 /// 365 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping 366 /// around unsigned integers. 367 sampleprof_error addCalledTarget(FunctionId F, uint64_t S, 368 uint64_t Weight = 1) { 369 uint64_t &TargetSamples = CallTargets[F]; 370 bool Overflowed; 371 TargetSamples = 372 SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed); 373 return Overflowed ? sampleprof_error::counter_overflow 374 : sampleprof_error::success; 375 } 376 377 /// Remove called function from the call target map. Return the target sample 378 /// count of the called function. 379 uint64_t removeCalledTarget(FunctionId F) { 380 uint64_t Count = 0; 381 auto I = CallTargets.find(F); 382 if (I != CallTargets.end()) { 383 Count = I->second; 384 CallTargets.erase(I); 385 } 386 return Count; 387 } 388 389 /// Return true if this sample record contains function calls. 390 bool hasCalls() const { return !CallTargets.empty(); } 391 392 uint64_t getSamples() const { return NumSamples; } 393 const CallTargetMap &getCallTargets() const { return CallTargets; } 394 const SortedCallTargetSet getSortedCallTargets() const { 395 return SortCallTargets(CallTargets); 396 } 397 398 uint64_t getCallTargetSum() const { 399 uint64_t Sum = 0; 400 for (const auto &I : CallTargets) 401 Sum += I.second; 402 return Sum; 403 } 404 405 /// Sort call targets in descending order of call frequency. 406 static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) { 407 SortedCallTargetSet SortedTargets; 408 for (const auto &[Target, Frequency] : Targets) { 409 SortedTargets.emplace(Target, Frequency); 410 } 411 return SortedTargets; 412 } 413 414 /// Prorate call targets by a distribution factor. 415 static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, 416 float DistributionFactor) { 417 CallTargetMap AdjustedTargets; 418 for (const auto &[Target, Frequency] : Targets) { 419 AdjustedTargets[Target] = Frequency * DistributionFactor; 420 } 421 return AdjustedTargets; 422 } 423 424 /// Merge the samples in \p Other into this record. 425 /// Optionally scale sample counts by \p Weight. 426 sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1); 427 void print(raw_ostream &OS, unsigned Indent) const; 428 void dump() const; 429 430 bool operator==(const SampleRecord &Other) const { 431 return NumSamples == Other.NumSamples && CallTargets == Other.CallTargets; 432 } 433 434 bool operator!=(const SampleRecord &Other) const { 435 return !(*this == Other); 436 } 437 438 private: 439 uint64_t NumSamples = 0; 440 CallTargetMap CallTargets; 441 }; 442 443 raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); 444 445 // State of context associated with FunctionSamples 446 enum ContextStateMask { 447 UnknownContext = 0x0, // Profile without context 448 RawContext = 0x1, // Full context profile from input profile 449 SyntheticContext = 0x2, // Synthetic context created for context promotion 450 InlinedContext = 0x4, // Profile for context that is inlined into caller 451 MergedContext = 0x8 // Profile for context merged into base profile 452 }; 453 454 // Attribute of context associated with FunctionSamples 455 enum ContextAttributeMask { 456 ContextNone = 0x0, 457 ContextWasInlined = 0x1, // Leaf of context was inlined in previous build 458 ContextShouldBeInlined = 0x2, // Leaf of context should be inlined 459 ContextDuplicatedIntoBase = 460 0x4, // Leaf of context is duplicated into the base profile 461 }; 462 463 // Represents a context frame with profile function and line location 464 struct SampleContextFrame { 465 FunctionId Func; 466 LineLocation Location; 467 468 SampleContextFrame() : Location(0, 0) {} 469 470 SampleContextFrame(FunctionId Func, LineLocation Location) 471 : Func(Func), Location(Location) {} 472 473 bool operator==(const SampleContextFrame &That) const { 474 return Location == That.Location && Func == That.Func; 475 } 476 477 bool operator!=(const SampleContextFrame &That) const { 478 return !(*this == That); 479 } 480 481 std::string toString(bool OutputLineLocation) const { 482 std::ostringstream OContextStr; 483 OContextStr << Func.str(); 484 if (OutputLineLocation) { 485 OContextStr << ":" << Location.LineOffset; 486 if (Location.Discriminator) 487 OContextStr << "." << Location.Discriminator; 488 } 489 return OContextStr.str(); 490 } 491 492 uint64_t getHashCode() const { 493 uint64_t NameHash = Func.getHashCode(); 494 uint64_t LocId = Location.getHashCode(); 495 return NameHash + (LocId << 5) + LocId; 496 } 497 }; 498 499 static inline hash_code hash_value(const SampleContextFrame &arg) { 500 return arg.getHashCode(); 501 } 502 503 using SampleContextFrameVector = SmallVector<SampleContextFrame, 1>; 504 using SampleContextFrames = ArrayRef<SampleContextFrame>; 505 506 struct SampleContextFrameHash { 507 uint64_t operator()(const SampleContextFrameVector &S) const { 508 return hash_combine_range(S.begin(), S.end()); 509 } 510 }; 511 512 // Sample context for FunctionSamples. It consists of the calling context, 513 // the function name and context state. Internally sample context is represented 514 // using ArrayRef, which is also the input for constructing a `SampleContext`. 515 // It can accept and represent both full context string as well as context-less 516 // function name. 517 // For a CS profile, a full context vector can look like: 518 // `main:3 _Z5funcAi:1 _Z8funcLeafi` 519 // For a base CS profile without calling context, the context vector should only 520 // contain the leaf frame name. 521 // For a non-CS profile, the context vector should be empty. 522 class SampleContext { 523 public: 524 SampleContext() : State(UnknownContext), Attributes(ContextNone) {} 525 526 SampleContext(StringRef Name) 527 : Func(Name), State(UnknownContext), Attributes(ContextNone) { 528 assert(!Name.empty() && "Name is empty"); 529 } 530 531 SampleContext(FunctionId Func) 532 : Func(Func), State(UnknownContext), Attributes(ContextNone) {} 533 534 SampleContext(SampleContextFrames Context, 535 ContextStateMask CState = RawContext) 536 : Attributes(ContextNone) { 537 assert(!Context.empty() && "Context is empty"); 538 setContext(Context, CState); 539 } 540 541 // Give a context string, decode and populate internal states like 542 // Function name, Calling context and context state. Example of input 543 // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` 544 SampleContext(StringRef ContextStr, 545 std::list<SampleContextFrameVector> &CSNameTable, 546 ContextStateMask CState = RawContext) 547 : Attributes(ContextNone) { 548 assert(!ContextStr.empty()); 549 // Note that `[]` wrapped input indicates a full context string, otherwise 550 // it's treated as context-less function name only. 551 bool HasContext = ContextStr.starts_with("["); 552 if (!HasContext) { 553 State = UnknownContext; 554 Func = FunctionId(ContextStr); 555 } else { 556 CSNameTable.emplace_back(); 557 SampleContextFrameVector &Context = CSNameTable.back(); 558 createCtxVectorFromStr(ContextStr, Context); 559 setContext(Context, CState); 560 } 561 } 562 563 /// Create a context vector from a given context string and save it in 564 /// `Context`. 565 static void createCtxVectorFromStr(StringRef ContextStr, 566 SampleContextFrameVector &Context) { 567 // Remove encapsulating '[' and ']' if any 568 ContextStr = ContextStr.substr(1, ContextStr.size() - 2); 569 StringRef ContextRemain = ContextStr; 570 StringRef ChildContext; 571 FunctionId Callee; 572 while (!ContextRemain.empty()) { 573 auto ContextSplit = ContextRemain.split(" @ "); 574 ChildContext = ContextSplit.first; 575 ContextRemain = ContextSplit.second; 576 LineLocation CallSiteLoc(0, 0); 577 decodeContextString(ChildContext, Callee, CallSiteLoc); 578 Context.emplace_back(Callee, CallSiteLoc); 579 } 580 } 581 582 // Decode context string for a frame to get function name and location. 583 // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`. 584 static void decodeContextString(StringRef ContextStr, 585 FunctionId &Func, 586 LineLocation &LineLoc) { 587 // Get function name 588 auto EntrySplit = ContextStr.split(':'); 589 Func = FunctionId(EntrySplit.first); 590 591 LineLoc = {0, 0}; 592 if (!EntrySplit.second.empty()) { 593 // Get line offset, use signed int for getAsInteger so string will 594 // be parsed as signed. 595 int LineOffset = 0; 596 auto LocSplit = EntrySplit.second.split('.'); 597 LocSplit.first.getAsInteger(10, LineOffset); 598 LineLoc.LineOffset = LineOffset; 599 600 // Get discriminator 601 if (!LocSplit.second.empty()) 602 LocSplit.second.getAsInteger(10, LineLoc.Discriminator); 603 } 604 } 605 606 operator SampleContextFrames() const { return FullContext; } 607 bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; } 608 void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; } 609 uint32_t getAllAttributes() { return Attributes; } 610 void setAllAttributes(uint32_t A) { Attributes = A; } 611 bool hasState(ContextStateMask S) { return State & (uint32_t)S; } 612 void setState(ContextStateMask S) { State |= (uint32_t)S; } 613 void clearState(ContextStateMask S) { State &= (uint32_t)~S; } 614 bool hasContext() const { return State != UnknownContext; } 615 bool isBaseContext() const { return FullContext.size() == 1; } 616 FunctionId getFunction() const { return Func; } 617 SampleContextFrames getContextFrames() const { return FullContext; } 618 619 static std::string getContextString(SampleContextFrames Context, 620 bool IncludeLeafLineLocation = false) { 621 std::ostringstream OContextStr; 622 for (uint32_t I = 0; I < Context.size(); I++) { 623 if (OContextStr.str().size()) { 624 OContextStr << " @ "; 625 } 626 OContextStr << Context[I].toString(I != Context.size() - 1 || 627 IncludeLeafLineLocation); 628 } 629 return OContextStr.str(); 630 } 631 632 std::string toString() const { 633 if (!hasContext()) 634 return Func.str(); 635 return getContextString(FullContext, false); 636 } 637 638 uint64_t getHashCode() const { 639 if (hasContext()) 640 return hash_value(getContextFrames()); 641 return getFunction().getHashCode(); 642 } 643 644 /// Set the name of the function and clear the current context. 645 void setFunction(FunctionId newFunction) { 646 Func = newFunction; 647 FullContext = SampleContextFrames(); 648 State = UnknownContext; 649 } 650 651 void setContext(SampleContextFrames Context, 652 ContextStateMask CState = RawContext) { 653 assert(CState != UnknownContext); 654 FullContext = Context; 655 Func = Context.back().Func; 656 State = CState; 657 } 658 659 bool operator==(const SampleContext &That) const { 660 return State == That.State && Func == That.Func && 661 FullContext == That.FullContext; 662 } 663 664 bool operator!=(const SampleContext &That) const { return !(*this == That); } 665 666 bool operator<(const SampleContext &That) const { 667 if (State != That.State) 668 return State < That.State; 669 670 if (!hasContext()) { 671 return Func < That.Func; 672 } 673 674 uint64_t I = 0; 675 while (I < std::min(FullContext.size(), That.FullContext.size())) { 676 auto &Context1 = FullContext[I]; 677 auto &Context2 = That.FullContext[I]; 678 auto V = Context1.Func.compare(Context2.Func); 679 if (V) 680 return V < 0; 681 if (Context1.Location != Context2.Location) 682 return Context1.Location < Context2.Location; 683 I++; 684 } 685 686 return FullContext.size() < That.FullContext.size(); 687 } 688 689 struct Hash { 690 uint64_t operator()(const SampleContext &Context) const { 691 return Context.getHashCode(); 692 } 693 }; 694 695 bool IsPrefixOf(const SampleContext &That) const { 696 auto ThisContext = FullContext; 697 auto ThatContext = That.FullContext; 698 if (ThatContext.size() < ThisContext.size()) 699 return false; 700 ThatContext = ThatContext.take_front(ThisContext.size()); 701 // Compare Leaf frame first 702 if (ThisContext.back().Func != ThatContext.back().Func) 703 return false; 704 // Compare leading context 705 return ThisContext.drop_back() == ThatContext.drop_back(); 706 } 707 708 private: 709 // The function associated with this context. If CS profile, this is the leaf 710 // function. 711 FunctionId Func; 712 // Full context including calling context and leaf function name 713 SampleContextFrames FullContext; 714 // State of the associated sample profile 715 uint32_t State; 716 // Attribute of the associated sample profile 717 uint32_t Attributes; 718 }; 719 720 static inline hash_code hash_value(const SampleContext &Context) { 721 return Context.getHashCode(); 722 } 723 724 inline raw_ostream &operator<<(raw_ostream &OS, const SampleContext &Context) { 725 return OS << Context.toString(); 726 } 727 728 class FunctionSamples; 729 class SampleProfileReaderItaniumRemapper; 730 731 using BodySampleMap = std::map<LineLocation, SampleRecord>; 732 // NOTE: Using a StringMap here makes parsed profiles consume around 17% more 733 // memory, which is *very* significant for large profiles. 734 using FunctionSamplesMap = std::map<FunctionId, FunctionSamples>; 735 using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>; 736 using LocToLocMap = 737 std::unordered_map<LineLocation, LineLocation, LineLocationHash>; 738 739 /// Representation of the samples collected for a function. 740 /// 741 /// This data structure contains all the collected samples for the body 742 /// of a function. Each sample corresponds to a LineLocation instance 743 /// within the body of the function. 744 class FunctionSamples { 745 public: 746 FunctionSamples() = default; 747 748 void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const; 749 void dump() const; 750 751 sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) { 752 bool Overflowed; 753 TotalSamples = 754 SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed); 755 return Overflowed ? sampleprof_error::counter_overflow 756 : sampleprof_error::success; 757 } 758 759 void removeTotalSamples(uint64_t Num) { 760 if (TotalSamples < Num) 761 TotalSamples = 0; 762 else 763 TotalSamples -= Num; 764 } 765 766 void setTotalSamples(uint64_t Num) { TotalSamples = Num; } 767 768 void setHeadSamples(uint64_t Num) { TotalHeadSamples = Num; } 769 770 sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { 771 bool Overflowed; 772 TotalHeadSamples = 773 SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed); 774 return Overflowed ? sampleprof_error::counter_overflow 775 : sampleprof_error::success; 776 } 777 778 sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator, 779 uint64_t Num, uint64_t Weight = 1) { 780 return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples( 781 Num, Weight); 782 } 783 784 sampleprof_error addCalledTargetSamples(uint32_t LineOffset, 785 uint32_t Discriminator, 786 FunctionId Func, 787 uint64_t Num, 788 uint64_t Weight = 1) { 789 return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget( 790 Func, Num, Weight); 791 } 792 793 sampleprof_error addSampleRecord(LineLocation Location, 794 const SampleRecord &SampleRecord, 795 uint64_t Weight = 1) { 796 return BodySamples[Location].merge(SampleRecord, Weight); 797 } 798 799 // Remove a call target and decrease the body sample correspondingly. Return 800 // the number of body samples actually decreased. 801 uint64_t removeCalledTargetAndBodySample(uint32_t LineOffset, 802 uint32_t Discriminator, 803 FunctionId Func) { 804 uint64_t Count = 0; 805 auto I = BodySamples.find(LineLocation(LineOffset, Discriminator)); 806 if (I != BodySamples.end()) { 807 Count = I->second.removeCalledTarget(Func); 808 Count = I->second.removeSamples(Count); 809 if (!I->second.getSamples()) 810 BodySamples.erase(I); 811 } 812 return Count; 813 } 814 815 // Remove all call site samples for inlinees. This is needed when flattening 816 // a nested profile. 817 void removeAllCallsiteSamples() { 818 CallsiteSamples.clear(); 819 } 820 821 // Accumulate all call target samples to update the body samples. 822 void updateCallsiteSamples() { 823 for (auto &I : BodySamples) { 824 uint64_t TargetSamples = I.second.getCallTargetSum(); 825 // It's possible that the body sample count can be greater than the call 826 // target sum. E.g, if some call targets are external targets, they won't 827 // be considered valid call targets, but the body sample count which is 828 // from lbr ranges can actually include them. 829 if (TargetSamples > I.second.getSamples()) 830 I.second.addSamples(TargetSamples - I.second.getSamples()); 831 } 832 } 833 834 // Accumulate all body samples to set total samples. 835 void updateTotalSamples() { 836 setTotalSamples(0); 837 for (const auto &I : BodySamples) 838 addTotalSamples(I.second.getSamples()); 839 840 for (auto &I : CallsiteSamples) { 841 for (auto &CS : I.second) { 842 CS.second.updateTotalSamples(); 843 addTotalSamples(CS.second.getTotalSamples()); 844 } 845 } 846 } 847 848 // Set current context and all callee contexts to be synthetic. 849 void SetContextSynthetic() { 850 Context.setState(SyntheticContext); 851 for (auto &I : CallsiteSamples) { 852 for (auto &CS : I.second) { 853 CS.second.SetContextSynthetic(); 854 } 855 } 856 } 857 858 // Query the stale profile matching results and remap the location. 859 const LineLocation &mapIRLocToProfileLoc(const LineLocation &IRLoc) const { 860 // There is no remapping if the profile is not stale or the matching gives 861 // the same location. 862 if (!IRToProfileLocationMap) 863 return IRLoc; 864 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc); 865 if (ProfileLoc != IRToProfileLocationMap->end()) 866 return ProfileLoc->second; 867 else 868 return IRLoc; 869 } 870 871 /// Return the number of samples collected at the given location. 872 /// Each location is specified by \p LineOffset and \p Discriminator. 873 /// If the location is not found in profile, return error. 874 ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset, 875 uint32_t Discriminator) const { 876 const auto &ret = BodySamples.find( 877 mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator))); 878 if (ret == BodySamples.end()) 879 return std::error_code(); 880 return ret->second.getSamples(); 881 } 882 883 /// Returns the call target map collected at a given location. 884 /// Each location is specified by \p LineOffset and \p Discriminator. 885 /// If the location is not found in profile, return error. 886 ErrorOr<const SampleRecord::CallTargetMap &> 887 findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const { 888 const auto &ret = BodySamples.find( 889 mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator))); 890 if (ret == BodySamples.end()) 891 return std::error_code(); 892 return ret->second.getCallTargets(); 893 } 894 895 /// Returns the call target map collected at a given location specified by \p 896 /// CallSite. If the location is not found in profile, return error. 897 ErrorOr<const SampleRecord::CallTargetMap &> 898 findCallTargetMapAt(const LineLocation &CallSite) const { 899 const auto &Ret = BodySamples.find(mapIRLocToProfileLoc(CallSite)); 900 if (Ret == BodySamples.end()) 901 return std::error_code(); 902 return Ret->second.getCallTargets(); 903 } 904 905 /// Return the function samples at the given callsite location. 906 FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) { 907 return CallsiteSamples[mapIRLocToProfileLoc(Loc)]; 908 } 909 910 /// Returns the FunctionSamplesMap at the given \p Loc. 911 const FunctionSamplesMap * 912 findFunctionSamplesMapAt(const LineLocation &Loc) const { 913 auto iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc)); 914 if (iter == CallsiteSamples.end()) 915 return nullptr; 916 return &iter->second; 917 } 918 919 /// Returns a pointer to FunctionSamples at the given callsite location 920 /// \p Loc with callee \p CalleeName. If no callsite can be found, relax 921 /// the restriction to return the FunctionSamples at callsite location 922 /// \p Loc with the maximum total sample count. If \p Remapper is not 923 /// nullptr, use \p Remapper to find FunctionSamples with equivalent name 924 /// as \p CalleeName. 925 const FunctionSamples * 926 findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName, 927 SampleProfileReaderItaniumRemapper *Remapper) const; 928 929 bool empty() const { return TotalSamples == 0; } 930 931 /// Return the total number of samples collected inside the function. 932 uint64_t getTotalSamples() const { return TotalSamples; } 933 934 /// For top-level functions, return the total number of branch samples that 935 /// have the function as the branch target (or 0 otherwise). This is the raw 936 /// data fetched from the profile. This should be equivalent to the sample of 937 /// the first instruction of the symbol. But as we directly get this info for 938 /// raw profile without referring to potentially inaccurate debug info, this 939 /// gives more accurate profile data and is preferred for standalone symbols. 940 uint64_t getHeadSamples() const { return TotalHeadSamples; } 941 942 /// Return an estimate of the sample count of the function entry basic block. 943 /// The function can be either a standalone symbol or an inlined function. 944 /// For Context-Sensitive profiles, this will prefer returning the head 945 /// samples (i.e. getHeadSamples()), if non-zero. Otherwise it estimates from 946 /// the function body's samples or callsite samples. 947 uint64_t getHeadSamplesEstimate() const { 948 if (FunctionSamples::ProfileIsCS && getHeadSamples()) { 949 // For CS profile, if we already have more accurate head samples 950 // counted by branch sample from caller, use them as entry samples. 951 return getHeadSamples(); 952 } 953 uint64_t Count = 0; 954 // Use either BodySamples or CallsiteSamples which ever has the smaller 955 // lineno. 956 if (!BodySamples.empty() && 957 (CallsiteSamples.empty() || 958 BodySamples.begin()->first < CallsiteSamples.begin()->first)) 959 Count = BodySamples.begin()->second.getSamples(); 960 else if (!CallsiteSamples.empty()) { 961 // An indirect callsite may be promoted to several inlined direct calls. 962 // We need to get the sum of them. 963 for (const auto &N_FS : CallsiteSamples.begin()->second) 964 Count += N_FS.second.getHeadSamplesEstimate(); 965 } 966 // Return at least 1 if total sample is not 0. 967 return Count ? Count : TotalSamples > 0; 968 } 969 970 /// Return all the samples collected in the body of the function. 971 const BodySampleMap &getBodySamples() const { return BodySamples; } 972 973 /// Return all the callsite samples collected in the body of the function. 974 const CallsiteSampleMap &getCallsiteSamples() const { 975 return CallsiteSamples; 976 } 977 978 /// Return the maximum of sample counts in a function body. When SkipCallSite 979 /// is false, which is the default, the return count includes samples in the 980 /// inlined functions. When SkipCallSite is true, the return count only 981 /// considers the body samples. 982 uint64_t getMaxCountInside(bool SkipCallSite = false) const { 983 uint64_t MaxCount = 0; 984 for (const auto &L : getBodySamples()) 985 MaxCount = std::max(MaxCount, L.second.getSamples()); 986 if (SkipCallSite) 987 return MaxCount; 988 for (const auto &C : getCallsiteSamples()) 989 for (const FunctionSamplesMap::value_type &F : C.second) 990 MaxCount = std::max(MaxCount, F.second.getMaxCountInside()); 991 return MaxCount; 992 } 993 994 /// Merge the samples in \p Other into this one. 995 /// Optionally scale samples by \p Weight. 996 sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) { 997 sampleprof_error Result = sampleprof_error::success; 998 if (!GUIDToFuncNameMap) 999 GUIDToFuncNameMap = Other.GUIDToFuncNameMap; 1000 if (Context.getFunction().empty()) 1001 Context = Other.getContext(); 1002 if (FunctionHash == 0) { 1003 // Set the function hash code for the target profile. 1004 FunctionHash = Other.getFunctionHash(); 1005 } else if (FunctionHash != Other.getFunctionHash()) { 1006 // The two profiles coming with different valid hash codes indicates 1007 // either: 1008 // 1. They are same-named static functions from different compilation 1009 // units (without using -unique-internal-linkage-names), or 1010 // 2. They are really the same function but from different compilations. 1011 // Let's bail out in either case for now, which means one profile is 1012 // dropped. 1013 return sampleprof_error::hash_mismatch; 1014 } 1015 1016 MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight)); 1017 MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight)); 1018 for (const auto &I : Other.getBodySamples()) { 1019 const LineLocation &Loc = I.first; 1020 const SampleRecord &Rec = I.second; 1021 MergeResult(Result, BodySamples[Loc].merge(Rec, Weight)); 1022 } 1023 for (const auto &I : Other.getCallsiteSamples()) { 1024 const LineLocation &Loc = I.first; 1025 FunctionSamplesMap &FSMap = functionSamplesAt(Loc); 1026 for (const auto &Rec : I.second) 1027 MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight)); 1028 } 1029 return Result; 1030 } 1031 1032 /// Recursively traverses all children, if the total sample count of the 1033 /// corresponding function is no less than \p Threshold, add its corresponding 1034 /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID 1035 /// to \p S. 1036 void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, 1037 const HashKeyMap<std::unordered_map, FunctionId, 1038 Function *> &SymbolMap, 1039 uint64_t Threshold) const { 1040 if (TotalSamples <= Threshold) 1041 return; 1042 auto isDeclaration = [](const Function *F) { 1043 return !F || F->isDeclaration(); 1044 }; 1045 if (isDeclaration(SymbolMap.lookup(getFunction()))) { 1046 // Add to the import list only when it's defined out of module. 1047 S.insert(getGUID()); 1048 } 1049 // Import hot CallTargets, which may not be available in IR because full 1050 // profile annotation cannot be done until backend compilation in ThinLTO. 1051 for (const auto &BS : BodySamples) 1052 for (const auto &TS : BS.second.getCallTargets()) 1053 if (TS.second > Threshold) { 1054 const Function *Callee = SymbolMap.lookup(TS.first); 1055 if (isDeclaration(Callee)) 1056 S.insert(TS.first.getHashCode()); 1057 } 1058 for (const auto &CS : CallsiteSamples) 1059 for (const auto &NameFS : CS.second) 1060 NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold); 1061 } 1062 1063 /// Set the name of the function. 1064 void setFunction(FunctionId newFunction) { 1065 Context.setFunction(newFunction); 1066 } 1067 1068 /// Return the function name. 1069 FunctionId getFunction() const { return Context.getFunction(); } 1070 1071 /// Return the original function name. 1072 StringRef getFuncName() const { return getFuncName(getFunction()); } 1073 1074 void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; } 1075 1076 uint64_t getFunctionHash() const { return FunctionHash; } 1077 1078 void setIRToProfileLocationMap(const LocToLocMap *LTLM) { 1079 assert(IRToProfileLocationMap == nullptr && "this should be set only once"); 1080 IRToProfileLocationMap = LTLM; 1081 } 1082 1083 /// Return the canonical name for a function, taking into account 1084 /// suffix elision policy attributes. 1085 static StringRef getCanonicalFnName(const Function &F) { 1086 auto AttrName = "sample-profile-suffix-elision-policy"; 1087 auto Attr = F.getFnAttribute(AttrName).getValueAsString(); 1088 return getCanonicalFnName(F.getName(), Attr); 1089 } 1090 1091 /// Name suffixes which canonicalization should handle to avoid 1092 /// profile mismatch. 1093 static constexpr const char *LLVMSuffix = ".llvm."; 1094 static constexpr const char *PartSuffix = ".part."; 1095 static constexpr const char *UniqSuffix = ".__uniq."; 1096 1097 static StringRef getCanonicalFnName(StringRef FnName, 1098 StringRef Attr = "selected") { 1099 // Note the sequence of the suffixes in the knownSuffixes array matters. 1100 // If suffix "A" is appended after the suffix "B", "A" should be in front 1101 // of "B" in knownSuffixes. 1102 const char *knownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix}; 1103 if (Attr == "" || Attr == "all") { 1104 return FnName.split('.').first; 1105 } else if (Attr == "selected") { 1106 StringRef Cand(FnName); 1107 for (const auto &Suf : knownSuffixes) { 1108 StringRef Suffix(Suf); 1109 // If the profile contains ".__uniq." suffix, don't strip the 1110 // suffix for names in the IR. 1111 if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix) 1112 continue; 1113 auto It = Cand.rfind(Suffix); 1114 if (It == StringRef::npos) 1115 continue; 1116 auto Dit = Cand.rfind('.'); 1117 if (Dit == It + Suffix.size() - 1) 1118 Cand = Cand.substr(0, It); 1119 } 1120 return Cand; 1121 } else if (Attr == "none") { 1122 return FnName; 1123 } else { 1124 assert(false && "internal error: unknown suffix elision policy"); 1125 } 1126 return FnName; 1127 } 1128 1129 /// Translate \p Func into its original name. 1130 /// When profile doesn't use MD5, \p Func needs no translation. 1131 /// When profile uses MD5, \p Func in current FunctionSamples 1132 /// is actually GUID of the original function name. getFuncName will 1133 /// translate \p Func in current FunctionSamples into its original name 1134 /// by looking up in the function map GUIDToFuncNameMap. 1135 /// If the original name doesn't exist in the map, return empty StringRef. 1136 StringRef getFuncName(FunctionId Func) const { 1137 if (!UseMD5) 1138 return Func.stringRef(); 1139 1140 assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first"); 1141 return GUIDToFuncNameMap->lookup(Func.getHashCode()); 1142 } 1143 1144 /// Returns the line offset to the start line of the subprogram. 1145 /// We assume that a single function will not exceed 65535 LOC. 1146 static unsigned getOffset(const DILocation *DIL); 1147 1148 /// Returns a unique call site identifier for a given debug location of a call 1149 /// instruction. This is wrapper of two scenarios, the probe-based profile and 1150 /// regular profile, to hide implementation details from the sample loader and 1151 /// the context tracker. 1152 static LineLocation getCallSiteIdentifier(const DILocation *DIL, 1153 bool ProfileIsFS = false); 1154 1155 /// Returns a unique hash code for a combination of a callsite location and 1156 /// the callee function name. 1157 /// Guarantee MD5 and non-MD5 representation of the same function results in 1158 /// the same hash. 1159 static uint64_t getCallSiteHash(FunctionId Callee, 1160 const LineLocation &Callsite) { 1161 return SampleContextFrame(Callee, Callsite).getHashCode(); 1162 } 1163 1164 /// Get the FunctionSamples of the inline instance where DIL originates 1165 /// from. 1166 /// 1167 /// The FunctionSamples of the instruction (Machine or IR) associated to 1168 /// \p DIL is the inlined instance in which that instruction is coming from. 1169 /// We traverse the inline stack of that instruction, and match it with the 1170 /// tree nodes in the profile. 1171 /// 1172 /// \returns the FunctionSamples pointer to the inlined instance. 1173 /// If \p Remapper is not nullptr, it will be used to find matching 1174 /// FunctionSamples with not exactly the same but equivalent name. 1175 const FunctionSamples *findFunctionSamples( 1176 const DILocation *DIL, 1177 SampleProfileReaderItaniumRemapper *Remapper = nullptr) const; 1178 1179 static bool ProfileIsProbeBased; 1180 1181 static bool ProfileIsCS; 1182 1183 static bool ProfileIsPreInlined; 1184 1185 SampleContext &getContext() const { return Context; } 1186 1187 void setContext(const SampleContext &FContext) { Context = FContext; } 1188 1189 /// Whether the profile uses MD5 to represent string. 1190 static bool UseMD5; 1191 1192 /// Whether the profile contains any ".__uniq." suffix in a name. 1193 static bool HasUniqSuffix; 1194 1195 /// If this profile uses flow sensitive discriminators. 1196 static bool ProfileIsFS; 1197 1198 /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for 1199 /// all the function symbols defined or declared in current module. 1200 DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr; 1201 1202 /// Return the GUID of the context's name. If the context is already using 1203 /// MD5, don't hash it again. 1204 uint64_t getGUID() const { 1205 return getFunction().getHashCode(); 1206 } 1207 1208 // Find all the names in the current FunctionSamples including names in 1209 // all the inline instances and names of call targets. 1210 void findAllNames(DenseSet<FunctionId> &NameSet) const; 1211 1212 bool operator==(const FunctionSamples &Other) const { 1213 return (GUIDToFuncNameMap == Other.GUIDToFuncNameMap || 1214 (GUIDToFuncNameMap && Other.GUIDToFuncNameMap && 1215 *GUIDToFuncNameMap == *Other.GUIDToFuncNameMap)) && 1216 FunctionHash == Other.FunctionHash && Context == Other.Context && 1217 TotalSamples == Other.TotalSamples && 1218 TotalHeadSamples == Other.TotalHeadSamples && 1219 BodySamples == Other.BodySamples && 1220 CallsiteSamples == Other.CallsiteSamples; 1221 } 1222 1223 bool operator!=(const FunctionSamples &Other) const { 1224 return !(*this == Other); 1225 } 1226 1227 private: 1228 /// CFG hash value for the function. 1229 uint64_t FunctionHash = 0; 1230 1231 /// Calling context for function profile 1232 mutable SampleContext Context; 1233 1234 /// Total number of samples collected inside this function. 1235 /// 1236 /// Samples are cumulative, they include all the samples collected 1237 /// inside this function and all its inlined callees. 1238 uint64_t TotalSamples = 0; 1239 1240 /// Total number of samples collected at the head of the function. 1241 /// This is an approximation of the number of calls made to this function 1242 /// at runtime. 1243 uint64_t TotalHeadSamples = 0; 1244 1245 /// Map instruction locations to collected samples. 1246 /// 1247 /// Each entry in this map contains the number of samples 1248 /// collected at the corresponding line offset. All line locations 1249 /// are an offset from the start of the function. 1250 BodySampleMap BodySamples; 1251 1252 /// Map call sites to collected samples for the called function. 1253 /// 1254 /// Each entry in this map corresponds to all the samples 1255 /// collected for the inlined function call at the given 1256 /// location. For example, given: 1257 /// 1258 /// void foo() { 1259 /// 1 bar(); 1260 /// ... 1261 /// 8 baz(); 1262 /// } 1263 /// 1264 /// If the bar() and baz() calls were inlined inside foo(), this 1265 /// map will contain two entries. One for all the samples collected 1266 /// in the call to bar() at line offset 1, the other for all the samples 1267 /// collected in the call to baz() at line offset 8. 1268 CallsiteSampleMap CallsiteSamples; 1269 1270 /// IR to profile location map generated by stale profile matching. 1271 /// 1272 /// Each entry is a mapping from the location on current build to the matched 1273 /// location in the "stale" profile. For example: 1274 /// Profiled source code: 1275 /// void foo() { 1276 /// 1 bar(); 1277 /// } 1278 /// 1279 /// Current source code: 1280 /// void foo() { 1281 /// 1 // Code change 1282 /// 2 bar(); 1283 /// } 1284 /// Supposing the stale profile matching algorithm generated the mapping [2 -> 1285 /// 1], the profile query using the location of bar on the IR which is 2 will 1286 /// be remapped to 1 and find the location of bar in the profile. 1287 const LocToLocMap *IRToProfileLocationMap = nullptr; 1288 }; 1289 1290 /// Get the proper representation of a string according to whether the 1291 /// current Format uses MD5 to represent the string. 1292 static inline FunctionId getRepInFormat(StringRef Name) { 1293 if (Name.empty() || !FunctionSamples::UseMD5) 1294 return FunctionId(Name); 1295 return FunctionId(Function::getGUID(Name)); 1296 } 1297 1298 raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS); 1299 1300 /// This class provides operator overloads to the map container using MD5 as the 1301 /// key type, so that existing code can still work in most cases using 1302 /// SampleContext as key. 1303 /// Note: when populating container, make sure to assign the SampleContext to 1304 /// the mapped value immediately because the key no longer holds it. 1305 class SampleProfileMap 1306 : public HashKeyMap<std::unordered_map, SampleContext, FunctionSamples> { 1307 public: 1308 // Convenience method because this is being used in many places. Set the 1309 // FunctionSamples' context if its newly inserted. 1310 mapped_type &Create(const SampleContext &Ctx) { 1311 auto Ret = try_emplace(Ctx, FunctionSamples()); 1312 if (Ret.second) 1313 Ret.first->second.setContext(Ctx); 1314 return Ret.first->second; 1315 } 1316 1317 iterator find(const SampleContext &Ctx) { 1318 return HashKeyMap<std::unordered_map, SampleContext, FunctionSamples>::find( 1319 Ctx); 1320 } 1321 1322 const_iterator find(const SampleContext &Ctx) const { 1323 return HashKeyMap<std::unordered_map, SampleContext, FunctionSamples>::find( 1324 Ctx); 1325 } 1326 1327 size_t erase(const SampleContext &Ctx) { 1328 return HashKeyMap<std::unordered_map, SampleContext, FunctionSamples>:: 1329 erase(Ctx); 1330 } 1331 1332 size_t erase(const key_type &Key) { return base_type::erase(Key); } 1333 1334 iterator erase(iterator It) { return base_type::erase(It); } 1335 }; 1336 1337 using NameFunctionSamples = std::pair<hash_code, const FunctionSamples *>; 1338 1339 void sortFuncProfiles(const SampleProfileMap &ProfileMap, 1340 std::vector<NameFunctionSamples> &SortedProfiles); 1341 1342 /// Sort a LocationT->SampleT map by LocationT. 1343 /// 1344 /// It produces a sorted list of <LocationT, SampleT> records by ascending 1345 /// order of LocationT. 1346 template <class LocationT, class SampleT> class SampleSorter { 1347 public: 1348 using SamplesWithLoc = std::pair<const LocationT, SampleT>; 1349 using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>; 1350 1351 SampleSorter(const std::map<LocationT, SampleT> &Samples) { 1352 for (const auto &I : Samples) 1353 V.push_back(&I); 1354 llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) { 1355 return A->first < B->first; 1356 }); 1357 } 1358 1359 const SamplesWithLocList &get() const { return V; } 1360 1361 private: 1362 SamplesWithLocList V; 1363 }; 1364 1365 /// SampleContextTrimmer impelements helper functions to trim, merge cold 1366 /// context profiles. It also supports context profile canonicalization to make 1367 /// sure ProfileMap's key is consistent with FunctionSample's name/context. 1368 class SampleContextTrimmer { 1369 public: 1370 SampleContextTrimmer(SampleProfileMap &Profiles) : ProfileMap(Profiles){}; 1371 // Trim and merge cold context profile when requested. TrimBaseProfileOnly 1372 // should only be effective when TrimColdContext is true. On top of 1373 // TrimColdContext, TrimBaseProfileOnly can be used to specify to trim all 1374 // cold profiles or only cold base profiles. Trimming base profiles only is 1375 // mainly to honor the preinliner decsion. Note that when MergeColdContext is 1376 // true, preinliner decsion is not honored anyway so TrimBaseProfileOnly will 1377 // be ignored. 1378 void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold, 1379 bool TrimColdContext, 1380 bool MergeColdContext, 1381 uint32_t ColdContextFrameLength, 1382 bool TrimBaseProfileOnly); 1383 1384 private: 1385 SampleProfileMap &ProfileMap; 1386 }; 1387 1388 /// Helper class for profile conversion. 1389 /// 1390 /// It supports full context-sensitive profile to nested profile conversion, 1391 /// nested profile to flatten profile conversion, etc. 1392 class ProfileConverter { 1393 public: 1394 ProfileConverter(SampleProfileMap &Profiles); 1395 // Convert a full context-sensitive flat sample profile into a nested sample 1396 // profile. 1397 void convertCSProfiles(); 1398 struct FrameNode { 1399 FrameNode(FunctionId FName = FunctionId(), 1400 FunctionSamples *FSamples = nullptr, 1401 LineLocation CallLoc = {0, 0}) 1402 : FuncName(FName), FuncSamples(FSamples), CallSiteLoc(CallLoc){}; 1403 1404 // Map line+discriminator location to child frame 1405 std::map<uint64_t, FrameNode> AllChildFrames; 1406 // Function name for current frame 1407 FunctionId FuncName; 1408 // Function Samples for current frame 1409 FunctionSamples *FuncSamples; 1410 // Callsite location in parent context 1411 LineLocation CallSiteLoc; 1412 1413 FrameNode *getOrCreateChildFrame(const LineLocation &CallSite, 1414 FunctionId CalleeName); 1415 }; 1416 1417 static void flattenProfile(SampleProfileMap &ProfileMap, 1418 bool ProfileIsCS = false) { 1419 SampleProfileMap TmpProfiles; 1420 flattenProfile(ProfileMap, TmpProfiles, ProfileIsCS); 1421 ProfileMap = std::move(TmpProfiles); 1422 } 1423 1424 static void flattenProfile(const SampleProfileMap &InputProfiles, 1425 SampleProfileMap &OutputProfiles, 1426 bool ProfileIsCS = false) { 1427 if (ProfileIsCS) { 1428 for (const auto &I : InputProfiles) { 1429 // Retain the profile name and clear the full context for each function 1430 // profile. 1431 FunctionSamples &FS = OutputProfiles.Create(I.second.getFunction()); 1432 FS.merge(I.second); 1433 } 1434 } else { 1435 for (const auto &I : InputProfiles) 1436 flattenNestedProfile(OutputProfiles, I.second); 1437 } 1438 } 1439 1440 private: 1441 static void flattenNestedProfile(SampleProfileMap &OutputProfiles, 1442 const FunctionSamples &FS) { 1443 // To retain the context, checksum, attributes of the original profile, make 1444 // a copy of it if no profile is found. 1445 SampleContext &Context = FS.getContext(); 1446 auto Ret = OutputProfiles.try_emplace(Context, FS); 1447 FunctionSamples &Profile = Ret.first->second; 1448 if (Ret.second) { 1449 // Clear nested inlinees' samples for the flattened copy. These inlinees 1450 // will have their own top-level entries after flattening. 1451 Profile.removeAllCallsiteSamples(); 1452 // We recompute TotalSamples later, so here set to zero. 1453 Profile.setTotalSamples(0); 1454 } else { 1455 for (const auto &[LineLocation, SampleRecord] : FS.getBodySamples()) { 1456 Profile.addSampleRecord(LineLocation, SampleRecord); 1457 } 1458 } 1459 1460 assert(Profile.getCallsiteSamples().empty() && 1461 "There should be no inlinees' profiles after flattening."); 1462 1463 // TotalSamples might not be equal to the sum of all samples from 1464 // BodySamples and CallsiteSamples. So here we use "TotalSamples = 1465 // Original_TotalSamples - All_of_Callsite_TotalSamples + 1466 // All_of_Callsite_HeadSamples" to compute the new TotalSamples. 1467 uint64_t TotalSamples = FS.getTotalSamples(); 1468 1469 for (const auto &I : FS.getCallsiteSamples()) { 1470 for (const auto &Callee : I.second) { 1471 const auto &CalleeProfile = Callee.second; 1472 // Add body sample. 1473 Profile.addBodySamples(I.first.LineOffset, I.first.Discriminator, 1474 CalleeProfile.getHeadSamplesEstimate()); 1475 // Add callsite sample. 1476 Profile.addCalledTargetSamples( 1477 I.first.LineOffset, I.first.Discriminator, 1478 CalleeProfile.getFunction(), 1479 CalleeProfile.getHeadSamplesEstimate()); 1480 // Update total samples. 1481 TotalSamples = TotalSamples >= CalleeProfile.getTotalSamples() 1482 ? TotalSamples - CalleeProfile.getTotalSamples() 1483 : 0; 1484 TotalSamples += CalleeProfile.getHeadSamplesEstimate(); 1485 // Recursively convert callee profile. 1486 flattenNestedProfile(OutputProfiles, CalleeProfile); 1487 } 1488 } 1489 Profile.addTotalSamples(TotalSamples); 1490 1491 Profile.setHeadSamples(Profile.getHeadSamplesEstimate()); 1492 } 1493 1494 // Nest all children profiles into the profile of Node. 1495 void convertCSProfiles(FrameNode &Node); 1496 FrameNode *getOrCreateContextPath(const SampleContext &Context); 1497 1498 SampleProfileMap &ProfileMap; 1499 FrameNode RootFrame; 1500 }; 1501 1502 /// ProfileSymbolList records the list of function symbols shown up 1503 /// in the binary used to generate the profile. It is useful to 1504 /// to discriminate a function being so cold as not to shown up 1505 /// in the profile and a function newly added. 1506 class ProfileSymbolList { 1507 public: 1508 /// copy indicates whether we need to copy the underlying memory 1509 /// for the input Name. 1510 void add(StringRef Name, bool copy = false) { 1511 if (!copy) { 1512 Syms.insert(Name); 1513 return; 1514 } 1515 Syms.insert(Name.copy(Allocator)); 1516 } 1517 1518 bool contains(StringRef Name) { return Syms.count(Name); } 1519 1520 void merge(const ProfileSymbolList &List) { 1521 for (auto Sym : List.Syms) 1522 add(Sym, true); 1523 } 1524 1525 unsigned size() { return Syms.size(); } 1526 1527 void setToCompress(bool TC) { ToCompress = TC; } 1528 bool toCompress() { return ToCompress; } 1529 1530 std::error_code read(const uint8_t *Data, uint64_t ListSize); 1531 std::error_code write(raw_ostream &OS); 1532 void dump(raw_ostream &OS = dbgs()) const; 1533 1534 private: 1535 // Determine whether or not to compress the symbol list when 1536 // writing it into profile. The variable is unused when the symbol 1537 // list is read from an existing profile. 1538 bool ToCompress = false; 1539 DenseSet<StringRef> Syms; 1540 BumpPtrAllocator Allocator; 1541 }; 1542 1543 } // end namespace sampleprof 1544 1545 using namespace sampleprof; 1546 // Provide DenseMapInfo for SampleContext. 1547 template <> struct DenseMapInfo<SampleContext> { 1548 static inline SampleContext getEmptyKey() { return SampleContext(); } 1549 1550 static inline SampleContext getTombstoneKey() { 1551 return SampleContext(FunctionId(~1ULL)); 1552 } 1553 1554 static unsigned getHashValue(const SampleContext &Val) { 1555 return Val.getHashCode(); 1556 } 1557 1558 static bool isEqual(const SampleContext &LHS, const SampleContext &RHS) { 1559 return LHS == RHS; 1560 } 1561 }; 1562 1563 // Prepend "__uniq" before the hash for tools like profilers to understand 1564 // that this symbol is of internal linkage type. The "__uniq" is the 1565 // pre-determined prefix that is used to tell tools that this symbol was 1566 // created with -funique-internal-linkage-symbols and the tools can strip or 1567 // keep the prefix as needed. 1568 inline std::string getUniqueInternalLinkagePostfix(const StringRef &FName) { 1569 llvm::MD5 Md5; 1570 Md5.update(FName); 1571 llvm::MD5::MD5Result R; 1572 Md5.final(R); 1573 SmallString<32> Str; 1574 llvm::MD5::stringifyResult(R, Str); 1575 // Convert MD5hash to Decimal. Demangler suffixes can either contain 1576 // numbers or characters but not both. 1577 llvm::APInt IntHash(128, Str.str(), 16); 1578 return toString(IntHash, /* Radix = */ 10, /* Signed = */ false) 1579 .insert(0, FunctionSamples::UniqSuffix); 1580 } 1581 1582 } // end namespace llvm 1583 1584 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H 1585