1 //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains common definitions used in the reading and writing of 10 // sample profile data. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H 15 #define LLVM_PROFILEDATA_SAMPLEPROF_H 16 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringMap.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/IR/Function.h" 23 #include "llvm/IR/GlobalValue.h" 24 #include "llvm/Support/Allocator.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/ErrorOr.h" 27 #include "llvm/Support/MathExtras.h" 28 #include <algorithm> 29 #include <cstdint> 30 #include <list> 31 #include <map> 32 #include <set> 33 #include <sstream> 34 #include <string> 35 #include <system_error> 36 #include <unordered_map> 37 #include <utility> 38 39 namespace llvm { 40 41 class DILocation; 42 class raw_ostream; 43 44 const std::error_category &sampleprof_category(); 45 46 enum class sampleprof_error { 47 success = 0, 48 bad_magic, 49 unsupported_version, 50 too_large, 51 truncated, 52 malformed, 53 unrecognized_format, 54 unsupported_writing_format, 55 truncated_name_table, 56 not_implemented, 57 counter_overflow, 58 ostream_seek_unsupported, 59 uncompress_failed, 60 zlib_unavailable, 61 hash_mismatch 62 }; 63 64 inline std::error_code make_error_code(sampleprof_error E) { 65 return std::error_code(static_cast<int>(E), sampleprof_category()); 66 } 67 68 inline sampleprof_error MergeResult(sampleprof_error &Accumulator, 69 sampleprof_error Result) { 70 // Prefer first error encountered as later errors may be secondary effects of 71 // the initial problem. 72 if (Accumulator == sampleprof_error::success && 73 Result != sampleprof_error::success) 74 Accumulator = Result; 75 return Accumulator; 76 } 77 78 } // end namespace llvm 79 80 namespace std { 81 82 template <> 83 struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {}; 84 85 } // end namespace std 86 87 namespace llvm { 88 namespace sampleprof { 89 90 enum SampleProfileFormat { 91 SPF_None = 0, 92 SPF_Text = 0x1, 93 SPF_Compact_Binary = 0x2, // Deprecated 94 SPF_GCC = 0x3, 95 SPF_Ext_Binary = 0x4, 96 SPF_Binary = 0xff 97 }; 98 99 enum SampleProfileLayout { 100 SPL_None = 0, 101 SPL_Nest = 0x1, 102 SPL_Flat = 0x2, 103 }; 104 105 static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) { 106 return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) | 107 uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) | 108 uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) | 109 uint64_t('2') << (64 - 56) | uint64_t(Format); 110 } 111 112 /// Get the proper representation of a string according to whether the 113 /// current Format uses MD5 to represent the string. 114 static inline StringRef getRepInFormat(StringRef Name, bool UseMD5, 115 std::string &GUIDBuf) { 116 if (Name.empty() || !UseMD5) 117 return Name; 118 GUIDBuf = std::to_string(Function::getGUID(Name)); 119 return GUIDBuf; 120 } 121 122 static inline uint64_t SPVersion() { return 103; } 123 124 // Section Type used by SampleProfileExtBinaryBaseReader and 125 // SampleProfileExtBinaryBaseWriter. Never change the existing 126 // value of enum. Only append new ones. 127 enum SecType { 128 SecInValid = 0, 129 SecProfSummary = 1, 130 SecNameTable = 2, 131 SecProfileSymbolList = 3, 132 SecFuncOffsetTable = 4, 133 SecFuncMetadata = 5, 134 SecCSNameTable = 6, 135 // marker for the first type of profile. 136 SecFuncProfileFirst = 32, 137 SecLBRProfile = SecFuncProfileFirst 138 }; 139 140 static inline std::string getSecName(SecType Type) { 141 switch ((int)Type) { // Avoid -Wcovered-switch-default 142 case SecInValid: 143 return "InvalidSection"; 144 case SecProfSummary: 145 return "ProfileSummarySection"; 146 case SecNameTable: 147 return "NameTableSection"; 148 case SecProfileSymbolList: 149 return "ProfileSymbolListSection"; 150 case SecFuncOffsetTable: 151 return "FuncOffsetTableSection"; 152 case SecFuncMetadata: 153 return "FunctionMetadata"; 154 case SecCSNameTable: 155 return "CSNameTableSection"; 156 case SecLBRProfile: 157 return "LBRProfileSection"; 158 default: 159 return "UnknownSection"; 160 } 161 } 162 163 // Entry type of section header table used by SampleProfileExtBinaryBaseReader 164 // and SampleProfileExtBinaryBaseWriter. 165 struct SecHdrTableEntry { 166 SecType Type; 167 uint64_t Flags; 168 uint64_t Offset; 169 uint64_t Size; 170 // The index indicating the location of the current entry in 171 // SectionHdrLayout table. 172 uint64_t LayoutIndex; 173 }; 174 175 // Flags common for all sections are defined here. In SecHdrTableEntry::Flags, 176 // common flags will be saved in the lower 32bits and section specific flags 177 // will be saved in the higher 32 bits. 178 enum class SecCommonFlags : uint32_t { 179 SecFlagInValid = 0, 180 SecFlagCompress = (1 << 0), 181 // Indicate the section contains only profile without context. 182 SecFlagFlat = (1 << 1) 183 }; 184 185 // Section specific flags are defined here. 186 // !!!Note: Everytime a new enum class is created here, please add 187 // a new check in verifySecFlag. 188 enum class SecNameTableFlags : uint32_t { 189 SecFlagInValid = 0, 190 SecFlagMD5Name = (1 << 0), 191 // Store MD5 in fixed length instead of ULEB128 so NameTable can be 192 // accessed like an array. 193 SecFlagFixedLengthMD5 = (1 << 1), 194 // Profile contains ".__uniq." suffix name. Compiler shouldn't strip 195 // the suffix when doing profile matching when seeing the flag. 196 SecFlagUniqSuffix = (1 << 2) 197 }; 198 enum class SecProfSummaryFlags : uint32_t { 199 SecFlagInValid = 0, 200 /// SecFlagPartial means the profile is for common/shared code. 201 /// The common profile is usually merged from profiles collected 202 /// from running other targets. 203 SecFlagPartial = (1 << 0), 204 /// SecFlagContext means this is context-sensitive flat profile for 205 /// CSSPGO 206 SecFlagFullContext = (1 << 1), 207 /// SecFlagFSDiscriminator means this profile uses flow-sensitive 208 /// discriminators. 209 SecFlagFSDiscriminator = (1 << 2), 210 /// SecFlagIsPreInlined means this profile contains ShouldBeInlined 211 /// contexts thus this is CS preinliner computed. 212 SecFlagIsPreInlined = (1 << 4), 213 }; 214 215 enum class SecFuncMetadataFlags : uint32_t { 216 SecFlagInvalid = 0, 217 SecFlagIsProbeBased = (1 << 0), 218 SecFlagHasAttribute = (1 << 1), 219 }; 220 221 enum class SecFuncOffsetFlags : uint32_t { 222 SecFlagInvalid = 0, 223 // Store function offsets in an order of contexts. The order ensures that 224 // callee contexts of a given context laid out next to it. 225 SecFlagOrdered = (1 << 0), 226 }; 227 228 // Verify section specific flag is used for the correct section. 229 template <class SecFlagType> 230 static inline void verifySecFlag(SecType Type, SecFlagType Flag) { 231 // No verification is needed for common flags. 232 if (std::is_same<SecCommonFlags, SecFlagType>()) 233 return; 234 235 // Verification starts here for section specific flag. 236 bool IsFlagLegal = false; 237 switch (Type) { 238 case SecNameTable: 239 IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>(); 240 break; 241 case SecProfSummary: 242 IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>(); 243 break; 244 case SecFuncMetadata: 245 IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>(); 246 break; 247 default: 248 case SecFuncOffsetTable: 249 IsFlagLegal = std::is_same<SecFuncOffsetFlags, SecFlagType>(); 250 break; 251 } 252 if (!IsFlagLegal) 253 llvm_unreachable("Misuse of a flag in an incompatible section"); 254 } 255 256 template <class SecFlagType> 257 static inline void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) { 258 verifySecFlag(Entry.Type, Flag); 259 auto FVal = static_cast<uint64_t>(Flag); 260 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); 261 Entry.Flags |= IsCommon ? FVal : (FVal << 32); 262 } 263 264 template <class SecFlagType> 265 static inline void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) { 266 verifySecFlag(Entry.Type, Flag); 267 auto FVal = static_cast<uint64_t>(Flag); 268 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); 269 Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32)); 270 } 271 272 template <class SecFlagType> 273 static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag) { 274 verifySecFlag(Entry.Type, Flag); 275 auto FVal = static_cast<uint64_t>(Flag); 276 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); 277 return Entry.Flags & (IsCommon ? FVal : (FVal << 32)); 278 } 279 280 /// Represents the relative location of an instruction. 281 /// 282 /// Instruction locations are specified by the line offset from the 283 /// beginning of the function (marked by the line where the function 284 /// header is) and the discriminator value within that line. 285 /// 286 /// The discriminator value is useful to distinguish instructions 287 /// that are on the same line but belong to different basic blocks 288 /// (e.g., the two post-increment instructions in "if (p) x++; else y++;"). 289 struct LineLocation { 290 LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {} 291 292 void print(raw_ostream &OS) const; 293 void dump() const; 294 295 bool operator<(const LineLocation &O) const { 296 return LineOffset < O.LineOffset || 297 (LineOffset == O.LineOffset && Discriminator < O.Discriminator); 298 } 299 300 bool operator==(const LineLocation &O) const { 301 return LineOffset == O.LineOffset && Discriminator == O.Discriminator; 302 } 303 304 bool operator!=(const LineLocation &O) const { 305 return LineOffset != O.LineOffset || Discriminator != O.Discriminator; 306 } 307 308 uint32_t LineOffset; 309 uint32_t Discriminator; 310 }; 311 312 struct LineLocationHash { 313 uint64_t operator()(const LineLocation &Loc) const { 314 return std::hash<std::uint64_t>{}((((uint64_t)Loc.LineOffset) << 32) | 315 Loc.Discriminator); 316 } 317 }; 318 319 raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc); 320 321 /// Representation of a single sample record. 322 /// 323 /// A sample record is represented by a positive integer value, which 324 /// indicates how frequently was the associated line location executed. 325 /// 326 /// Additionally, if the associated location contains a function call, 327 /// the record will hold a list of all the possible called targets. For 328 /// direct calls, this will be the exact function being invoked. For 329 /// indirect calls (function pointers, virtual table dispatch), this 330 /// will be a list of one or more functions. 331 class SampleRecord { 332 public: 333 using CallTarget = std::pair<StringRef, uint64_t>; 334 struct CallTargetComparator { 335 bool operator()(const CallTarget &LHS, const CallTarget &RHS) const { 336 if (LHS.second != RHS.second) 337 return LHS.second > RHS.second; 338 339 return LHS.first < RHS.first; 340 } 341 }; 342 343 using SortedCallTargetSet = std::set<CallTarget, CallTargetComparator>; 344 using CallTargetMap = StringMap<uint64_t>; 345 SampleRecord() = default; 346 347 /// Increment the number of samples for this record by \p S. 348 /// Optionally scale sample count \p S by \p Weight. 349 /// 350 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping 351 /// around unsigned integers. 352 sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) { 353 bool Overflowed; 354 NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed); 355 return Overflowed ? sampleprof_error::counter_overflow 356 : sampleprof_error::success; 357 } 358 359 /// Decrease the number of samples for this record by \p S. Return the amout 360 /// of samples actually decreased. 361 uint64_t removeSamples(uint64_t S) { 362 if (S > NumSamples) 363 S = NumSamples; 364 NumSamples -= S; 365 return S; 366 } 367 368 /// Add called function \p F with samples \p S. 369 /// Optionally scale sample count \p S by \p Weight. 370 /// 371 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping 372 /// around unsigned integers. 373 sampleprof_error addCalledTarget(StringRef F, uint64_t S, 374 uint64_t Weight = 1) { 375 uint64_t &TargetSamples = CallTargets[F]; 376 bool Overflowed; 377 TargetSamples = 378 SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed); 379 return Overflowed ? sampleprof_error::counter_overflow 380 : sampleprof_error::success; 381 } 382 383 /// Remove called function from the call target map. Return the target sample 384 /// count of the called function. 385 uint64_t removeCalledTarget(StringRef F) { 386 uint64_t Count = 0; 387 auto I = CallTargets.find(F); 388 if (I != CallTargets.end()) { 389 Count = I->second; 390 CallTargets.erase(I); 391 } 392 return Count; 393 } 394 395 /// Return true if this sample record contains function calls. 396 bool hasCalls() const { return !CallTargets.empty(); } 397 398 uint64_t getSamples() const { return NumSamples; } 399 const CallTargetMap &getCallTargets() const { return CallTargets; } 400 const SortedCallTargetSet getSortedCallTargets() const { 401 return SortCallTargets(CallTargets); 402 } 403 404 uint64_t getCallTargetSum() const { 405 uint64_t Sum = 0; 406 for (const auto &I : CallTargets) 407 Sum += I.second; 408 return Sum; 409 } 410 411 /// Sort call targets in descending order of call frequency. 412 static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) { 413 SortedCallTargetSet SortedTargets; 414 for (const auto &[Target, Frequency] : Targets) { 415 SortedTargets.emplace(Target, Frequency); 416 } 417 return SortedTargets; 418 } 419 420 /// Prorate call targets by a distribution factor. 421 static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, 422 float DistributionFactor) { 423 CallTargetMap AdjustedTargets; 424 for (const auto &[Target, Frequency] : Targets) { 425 AdjustedTargets[Target] = Frequency * DistributionFactor; 426 } 427 return AdjustedTargets; 428 } 429 430 /// Merge the samples in \p Other into this record. 431 /// Optionally scale sample counts by \p Weight. 432 sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1); 433 void print(raw_ostream &OS, unsigned Indent) const; 434 void dump() const; 435 436 bool operator==(const SampleRecord &Other) const { 437 return NumSamples == Other.NumSamples && CallTargets == Other.CallTargets; 438 } 439 440 bool operator!=(const SampleRecord &Other) const { 441 return !(*this == Other); 442 } 443 444 private: 445 uint64_t NumSamples = 0; 446 CallTargetMap CallTargets; 447 }; 448 449 raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); 450 451 // State of context associated with FunctionSamples 452 enum ContextStateMask { 453 UnknownContext = 0x0, // Profile without context 454 RawContext = 0x1, // Full context profile from input profile 455 SyntheticContext = 0x2, // Synthetic context created for context promotion 456 InlinedContext = 0x4, // Profile for context that is inlined into caller 457 MergedContext = 0x8 // Profile for context merged into base profile 458 }; 459 460 // Attribute of context associated with FunctionSamples 461 enum ContextAttributeMask { 462 ContextNone = 0x0, 463 ContextWasInlined = 0x1, // Leaf of context was inlined in previous build 464 ContextShouldBeInlined = 0x2, // Leaf of context should be inlined 465 ContextDuplicatedIntoBase = 466 0x4, // Leaf of context is duplicated into the base profile 467 }; 468 469 // Represents a context frame with function name and line location 470 struct SampleContextFrame { 471 StringRef FuncName; 472 LineLocation Location; 473 474 SampleContextFrame() : Location(0, 0) {} 475 476 SampleContextFrame(StringRef FuncName, LineLocation Location) 477 : FuncName(FuncName), Location(Location) {} 478 479 bool operator==(const SampleContextFrame &That) const { 480 return Location == That.Location && FuncName == That.FuncName; 481 } 482 483 bool operator!=(const SampleContextFrame &That) const { 484 return !(*this == That); 485 } 486 487 std::string toString(bool OutputLineLocation) const { 488 std::ostringstream OContextStr; 489 OContextStr << FuncName.str(); 490 if (OutputLineLocation) { 491 OContextStr << ":" << Location.LineOffset; 492 if (Location.Discriminator) 493 OContextStr << "." << Location.Discriminator; 494 } 495 return OContextStr.str(); 496 } 497 }; 498 499 static inline hash_code hash_value(const SampleContextFrame &arg) { 500 return hash_combine(arg.FuncName, arg.Location.LineOffset, 501 arg.Location.Discriminator); 502 } 503 504 using SampleContextFrameVector = SmallVector<SampleContextFrame, 1>; 505 using SampleContextFrames = ArrayRef<SampleContextFrame>; 506 507 struct SampleContextFrameHash { 508 uint64_t operator()(const SampleContextFrameVector &S) const { 509 return hash_combine_range(S.begin(), S.end()); 510 } 511 }; 512 513 // Sample context for FunctionSamples. It consists of the calling context, 514 // the function name and context state. Internally sample context is represented 515 // using ArrayRef, which is also the input for constructing a `SampleContext`. 516 // It can accept and represent both full context string as well as context-less 517 // function name. 518 // For a CS profile, a full context vector can look like: 519 // `main:3 _Z5funcAi:1 _Z8funcLeafi` 520 // For a base CS profile without calling context, the context vector should only 521 // contain the leaf frame name. 522 // For a non-CS profile, the context vector should be empty. 523 class SampleContext { 524 public: 525 SampleContext() : State(UnknownContext), Attributes(ContextNone) {} 526 527 SampleContext(StringRef Name) 528 : Name(Name), State(UnknownContext), Attributes(ContextNone) {} 529 530 SampleContext(SampleContextFrames Context, 531 ContextStateMask CState = RawContext) 532 : Attributes(ContextNone) { 533 assert(!Context.empty() && "Context is empty"); 534 setContext(Context, CState); 535 } 536 537 // Give a context string, decode and populate internal states like 538 // Function name, Calling context and context state. Example of input 539 // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` 540 SampleContext(StringRef ContextStr, 541 std::list<SampleContextFrameVector> &CSNameTable, 542 ContextStateMask CState = RawContext) 543 : Attributes(ContextNone) { 544 assert(!ContextStr.empty()); 545 // Note that `[]` wrapped input indicates a full context string, otherwise 546 // it's treated as context-less function name only. 547 bool HasContext = ContextStr.startswith("["); 548 if (!HasContext) { 549 State = UnknownContext; 550 Name = ContextStr; 551 } else { 552 CSNameTable.emplace_back(); 553 SampleContextFrameVector &Context = CSNameTable.back(); 554 createCtxVectorFromStr(ContextStr, Context); 555 setContext(Context, CState); 556 } 557 } 558 559 /// Create a context vector from a given context string and save it in 560 /// `Context`. 561 static void createCtxVectorFromStr(StringRef ContextStr, 562 SampleContextFrameVector &Context) { 563 // Remove encapsulating '[' and ']' if any 564 ContextStr = ContextStr.substr(1, ContextStr.size() - 2); 565 StringRef ContextRemain = ContextStr; 566 StringRef ChildContext; 567 StringRef CalleeName; 568 while (!ContextRemain.empty()) { 569 auto ContextSplit = ContextRemain.split(" @ "); 570 ChildContext = ContextSplit.first; 571 ContextRemain = ContextSplit.second; 572 LineLocation CallSiteLoc(0, 0); 573 decodeContextString(ChildContext, CalleeName, CallSiteLoc); 574 Context.emplace_back(CalleeName, CallSiteLoc); 575 } 576 } 577 578 // Decode context string for a frame to get function name and location. 579 // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`. 580 static void decodeContextString(StringRef ContextStr, StringRef &FName, 581 LineLocation &LineLoc) { 582 // Get function name 583 auto EntrySplit = ContextStr.split(':'); 584 FName = EntrySplit.first; 585 586 LineLoc = {0, 0}; 587 if (!EntrySplit.second.empty()) { 588 // Get line offset, use signed int for getAsInteger so string will 589 // be parsed as signed. 590 int LineOffset = 0; 591 auto LocSplit = EntrySplit.second.split('.'); 592 LocSplit.first.getAsInteger(10, LineOffset); 593 LineLoc.LineOffset = LineOffset; 594 595 // Get discriminator 596 if (!LocSplit.second.empty()) 597 LocSplit.second.getAsInteger(10, LineLoc.Discriminator); 598 } 599 } 600 601 operator SampleContextFrames() const { return FullContext; } 602 bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; } 603 void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; } 604 uint32_t getAllAttributes() { return Attributes; } 605 void setAllAttributes(uint32_t A) { Attributes = A; } 606 bool hasState(ContextStateMask S) { return State & (uint32_t)S; } 607 void setState(ContextStateMask S) { State |= (uint32_t)S; } 608 void clearState(ContextStateMask S) { State &= (uint32_t)~S; } 609 bool hasContext() const { return State != UnknownContext; } 610 bool isBaseContext() const { return FullContext.size() == 1; } 611 StringRef getName() const { return Name; } 612 SampleContextFrames getContextFrames() const { return FullContext; } 613 614 static std::string getContextString(SampleContextFrames Context, 615 bool IncludeLeafLineLocation = false) { 616 std::ostringstream OContextStr; 617 for (uint32_t I = 0; I < Context.size(); I++) { 618 if (OContextStr.str().size()) { 619 OContextStr << " @ "; 620 } 621 OContextStr << Context[I].toString(I != Context.size() - 1 || 622 IncludeLeafLineLocation); 623 } 624 return OContextStr.str(); 625 } 626 627 std::string toString() const { 628 if (!hasContext()) 629 return Name.str(); 630 return getContextString(FullContext, false); 631 } 632 633 uint64_t getHashCode() const { 634 return hasContext() ? hash_value(getContextFrames()) 635 : hash_value(getName()); 636 } 637 638 /// Set the name of the function and clear the current context. 639 void setName(StringRef FunctionName) { 640 Name = FunctionName; 641 FullContext = SampleContextFrames(); 642 State = UnknownContext; 643 } 644 645 void setContext(SampleContextFrames Context, 646 ContextStateMask CState = RawContext) { 647 assert(CState != UnknownContext); 648 FullContext = Context; 649 Name = Context.back().FuncName; 650 State = CState; 651 } 652 653 bool operator==(const SampleContext &That) const { 654 return State == That.State && Name == That.Name && 655 FullContext == That.FullContext; 656 } 657 658 bool operator!=(const SampleContext &That) const { return !(*this == That); } 659 660 bool operator<(const SampleContext &That) const { 661 if (State != That.State) 662 return State < That.State; 663 664 if (!hasContext()) { 665 return Name < That.Name; 666 } 667 668 uint64_t I = 0; 669 while (I < std::min(FullContext.size(), That.FullContext.size())) { 670 auto &Context1 = FullContext[I]; 671 auto &Context2 = That.FullContext[I]; 672 auto V = Context1.FuncName.compare(Context2.FuncName); 673 if (V) 674 return V < 0; 675 if (Context1.Location != Context2.Location) 676 return Context1.Location < Context2.Location; 677 I++; 678 } 679 680 return FullContext.size() < That.FullContext.size(); 681 } 682 683 struct Hash { 684 uint64_t operator()(const SampleContext &Context) const { 685 return Context.getHashCode(); 686 } 687 }; 688 689 bool IsPrefixOf(const SampleContext &That) const { 690 auto ThisContext = FullContext; 691 auto ThatContext = That.FullContext; 692 if (ThatContext.size() < ThisContext.size()) 693 return false; 694 ThatContext = ThatContext.take_front(ThisContext.size()); 695 // Compare Leaf frame first 696 if (ThisContext.back().FuncName != ThatContext.back().FuncName) 697 return false; 698 // Compare leading context 699 return ThisContext.drop_back() == ThatContext.drop_back(); 700 } 701 702 private: 703 /// Mangled name of the function. 704 StringRef Name; 705 // Full context including calling context and leaf function name 706 SampleContextFrames FullContext; 707 // State of the associated sample profile 708 uint32_t State; 709 // Attribute of the associated sample profile 710 uint32_t Attributes; 711 }; 712 713 static inline hash_code hash_value(const SampleContext &arg) { 714 return arg.hasContext() ? hash_value(arg.getContextFrames()) 715 : hash_value(arg.getName()); 716 } 717 718 class FunctionSamples; 719 class SampleProfileReaderItaniumRemapper; 720 721 using BodySampleMap = std::map<LineLocation, SampleRecord>; 722 // NOTE: Using a StringMap here makes parsed profiles consume around 17% more 723 // memory, which is *very* significant for large profiles. 724 using FunctionSamplesMap = std::map<std::string, FunctionSamples, std::less<>>; 725 using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>; 726 using LocToLocMap = 727 std::unordered_map<LineLocation, LineLocation, LineLocationHash>; 728 729 /// Representation of the samples collected for a function. 730 /// 731 /// This data structure contains all the collected samples for the body 732 /// of a function. Each sample corresponds to a LineLocation instance 733 /// within the body of the function. 734 class FunctionSamples { 735 public: 736 FunctionSamples() = default; 737 738 void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const; 739 void dump() const; 740 741 sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) { 742 bool Overflowed; 743 TotalSamples = 744 SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed); 745 return Overflowed ? sampleprof_error::counter_overflow 746 : sampleprof_error::success; 747 } 748 749 void removeTotalSamples(uint64_t Num) { 750 if (TotalSamples < Num) 751 TotalSamples = 0; 752 else 753 TotalSamples -= Num; 754 } 755 756 void setTotalSamples(uint64_t Num) { TotalSamples = Num; } 757 758 void setHeadSamples(uint64_t Num) { TotalHeadSamples = Num; } 759 760 sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { 761 bool Overflowed; 762 TotalHeadSamples = 763 SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed); 764 return Overflowed ? sampleprof_error::counter_overflow 765 : sampleprof_error::success; 766 } 767 768 sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator, 769 uint64_t Num, uint64_t Weight = 1) { 770 return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples( 771 Num, Weight); 772 } 773 774 sampleprof_error addCalledTargetSamples(uint32_t LineOffset, 775 uint32_t Discriminator, 776 StringRef FName, uint64_t Num, 777 uint64_t Weight = 1) { 778 return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget( 779 FName, Num, Weight); 780 } 781 782 sampleprof_error addSampleRecord(LineLocation Location, 783 const SampleRecord &SampleRecord, uint64_t Weight = 1) { 784 return BodySamples[Location].merge(SampleRecord, Weight); 785 } 786 787 // Remove a call target and decrease the body sample correspondingly. Return 788 // the number of body samples actually decreased. 789 uint64_t removeCalledTargetAndBodySample(uint32_t LineOffset, 790 uint32_t Discriminator, 791 StringRef FName) { 792 uint64_t Count = 0; 793 auto I = BodySamples.find(LineLocation(LineOffset, Discriminator)); 794 if (I != BodySamples.end()) { 795 Count = I->second.removeCalledTarget(FName); 796 Count = I->second.removeSamples(Count); 797 if (!I->second.getSamples()) 798 BodySamples.erase(I); 799 } 800 return Count; 801 } 802 803 // Remove all call site samples for inlinees. This is needed when flattening 804 // a nested profile. 805 void removeAllCallsiteSamples() { 806 CallsiteSamples.clear(); 807 } 808 809 // Accumulate all call target samples to update the body samples. 810 void updateCallsiteSamples() { 811 for (auto &I : BodySamples) { 812 uint64_t TargetSamples = I.second.getCallTargetSum(); 813 // It's possible that the body sample count can be greater than the call 814 // target sum. E.g, if some call targets are external targets, they won't 815 // be considered valid call targets, but the body sample count which is 816 // from lbr ranges can actually include them. 817 if (TargetSamples > I.second.getSamples()) 818 I.second.addSamples(TargetSamples - I.second.getSamples()); 819 } 820 } 821 822 // Accumulate all body samples to set total samples. 823 void updateTotalSamples() { 824 setTotalSamples(0); 825 for (const auto &I : BodySamples) 826 addTotalSamples(I.second.getSamples()); 827 828 for (auto &I : CallsiteSamples) { 829 for (auto &CS : I.second) { 830 CS.second.updateTotalSamples(); 831 addTotalSamples(CS.second.getTotalSamples()); 832 } 833 } 834 } 835 836 // Set current context and all callee contexts to be synthetic. 837 void SetContextSynthetic() { 838 Context.setState(SyntheticContext); 839 for (auto &I : CallsiteSamples) { 840 for (auto &CS : I.second) { 841 CS.second.SetContextSynthetic(); 842 } 843 } 844 } 845 846 // Query the stale profile matching results and remap the location. 847 const LineLocation &mapIRLocToProfileLoc(const LineLocation &IRLoc) const { 848 // There is no remapping if the profile is not stale or the matching gives 849 // the same location. 850 if (!IRToProfileLocationMap) 851 return IRLoc; 852 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc); 853 if (ProfileLoc != IRToProfileLocationMap->end()) 854 return ProfileLoc->second; 855 else 856 return IRLoc; 857 } 858 859 /// Return the number of samples collected at the given location. 860 /// Each location is specified by \p LineOffset and \p Discriminator. 861 /// If the location is not found in profile, return error. 862 ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset, 863 uint32_t Discriminator) const { 864 const auto &ret = BodySamples.find( 865 mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator))); 866 if (ret == BodySamples.end()) 867 return std::error_code(); 868 return ret->second.getSamples(); 869 } 870 871 /// Returns the call target map collected at a given location. 872 /// Each location is specified by \p LineOffset and \p Discriminator. 873 /// If the location is not found in profile, return error. 874 ErrorOr<SampleRecord::CallTargetMap> 875 findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const { 876 const auto &ret = BodySamples.find( 877 mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator))); 878 if (ret == BodySamples.end()) 879 return std::error_code(); 880 return ret->second.getCallTargets(); 881 } 882 883 /// Returns the call target map collected at a given location specified by \p 884 /// CallSite. If the location is not found in profile, return error. 885 ErrorOr<SampleRecord::CallTargetMap> 886 findCallTargetMapAt(const LineLocation &CallSite) const { 887 const auto &Ret = BodySamples.find(mapIRLocToProfileLoc(CallSite)); 888 if (Ret == BodySamples.end()) 889 return std::error_code(); 890 return Ret->second.getCallTargets(); 891 } 892 893 /// Return the function samples at the given callsite location. 894 FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) { 895 return CallsiteSamples[mapIRLocToProfileLoc(Loc)]; 896 } 897 898 /// Returns the FunctionSamplesMap at the given \p Loc. 899 const FunctionSamplesMap * 900 findFunctionSamplesMapAt(const LineLocation &Loc) const { 901 auto iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc)); 902 if (iter == CallsiteSamples.end()) 903 return nullptr; 904 return &iter->second; 905 } 906 907 /// Returns a pointer to FunctionSamples at the given callsite location 908 /// \p Loc with callee \p CalleeName. If no callsite can be found, relax 909 /// the restriction to return the FunctionSamples at callsite location 910 /// \p Loc with the maximum total sample count. If \p Remapper is not 911 /// nullptr, use \p Remapper to find FunctionSamples with equivalent name 912 /// as \p CalleeName. 913 const FunctionSamples * 914 findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName, 915 SampleProfileReaderItaniumRemapper *Remapper) const; 916 917 bool empty() const { return TotalSamples == 0; } 918 919 /// Return the total number of samples collected inside the function. 920 uint64_t getTotalSamples() const { return TotalSamples; } 921 922 /// For top-level functions, return the total number of branch samples that 923 /// have the function as the branch target (or 0 otherwise). This is the raw 924 /// data fetched from the profile. This should be equivalent to the sample of 925 /// the first instruction of the symbol. But as we directly get this info for 926 /// raw profile without referring to potentially inaccurate debug info, this 927 /// gives more accurate profile data and is preferred for standalone symbols. 928 uint64_t getHeadSamples() const { return TotalHeadSamples; } 929 930 /// Return an estimate of the sample count of the function entry basic block. 931 /// The function can be either a standalone symbol or an inlined function. 932 /// For Context-Sensitive profiles, this will prefer returning the head 933 /// samples (i.e. getHeadSamples()), if non-zero. Otherwise it estimates from 934 /// the function body's samples or callsite samples. 935 uint64_t getHeadSamplesEstimate() const { 936 if (FunctionSamples::ProfileIsCS && getHeadSamples()) { 937 // For CS profile, if we already have more accurate head samples 938 // counted by branch sample from caller, use them as entry samples. 939 return getHeadSamples(); 940 } 941 uint64_t Count = 0; 942 // Use either BodySamples or CallsiteSamples which ever has the smaller 943 // lineno. 944 if (!BodySamples.empty() && 945 (CallsiteSamples.empty() || 946 BodySamples.begin()->first < CallsiteSamples.begin()->first)) 947 Count = BodySamples.begin()->second.getSamples(); 948 else if (!CallsiteSamples.empty()) { 949 // An indirect callsite may be promoted to several inlined direct calls. 950 // We need to get the sum of them. 951 for (const auto &N_FS : CallsiteSamples.begin()->second) 952 Count += N_FS.second.getHeadSamplesEstimate(); 953 } 954 // Return at least 1 if total sample is not 0. 955 return Count ? Count : TotalSamples > 0; 956 } 957 958 /// Return all the samples collected in the body of the function. 959 const BodySampleMap &getBodySamples() const { return BodySamples; } 960 961 /// Return all the callsite samples collected in the body of the function. 962 const CallsiteSampleMap &getCallsiteSamples() const { 963 return CallsiteSamples; 964 } 965 966 /// Return the maximum of sample counts in a function body. When SkipCallSite 967 /// is false, which is the default, the return count includes samples in the 968 /// inlined functions. When SkipCallSite is true, the return count only 969 /// considers the body samples. 970 uint64_t getMaxCountInside(bool SkipCallSite = false) const { 971 uint64_t MaxCount = 0; 972 for (const auto &L : getBodySamples()) 973 MaxCount = std::max(MaxCount, L.second.getSamples()); 974 if (SkipCallSite) 975 return MaxCount; 976 for (const auto &C : getCallsiteSamples()) 977 for (const FunctionSamplesMap::value_type &F : C.second) 978 MaxCount = std::max(MaxCount, F.second.getMaxCountInside()); 979 return MaxCount; 980 } 981 982 /// Merge the samples in \p Other into this one. 983 /// Optionally scale samples by \p Weight. 984 sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) { 985 sampleprof_error Result = sampleprof_error::success; 986 if (!GUIDToFuncNameMap) 987 GUIDToFuncNameMap = Other.GUIDToFuncNameMap; 988 if (Context.getName().empty()) 989 Context = Other.getContext(); 990 if (FunctionHash == 0) { 991 // Set the function hash code for the target profile. 992 FunctionHash = Other.getFunctionHash(); 993 } else if (FunctionHash != Other.getFunctionHash()) { 994 // The two profiles coming with different valid hash codes indicates 995 // either: 996 // 1. They are same-named static functions from different compilation 997 // units (without using -unique-internal-linkage-names), or 998 // 2. They are really the same function but from different compilations. 999 // Let's bail out in either case for now, which means one profile is 1000 // dropped. 1001 return sampleprof_error::hash_mismatch; 1002 } 1003 1004 MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight)); 1005 MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight)); 1006 for (const auto &I : Other.getBodySamples()) { 1007 const LineLocation &Loc = I.first; 1008 const SampleRecord &Rec = I.second; 1009 MergeResult(Result, BodySamples[Loc].merge(Rec, Weight)); 1010 } 1011 for (const auto &I : Other.getCallsiteSamples()) { 1012 const LineLocation &Loc = I.first; 1013 FunctionSamplesMap &FSMap = functionSamplesAt(Loc); 1014 for (const auto &Rec : I.second) 1015 MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight)); 1016 } 1017 return Result; 1018 } 1019 1020 /// Recursively traverses all children, if the total sample count of the 1021 /// corresponding function is no less than \p Threshold, add its corresponding 1022 /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID 1023 /// to \p S. 1024 void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, 1025 const StringMap<Function *> &SymbolMap, 1026 uint64_t Threshold) const { 1027 if (TotalSamples <= Threshold) 1028 return; 1029 auto isDeclaration = [](const Function *F) { 1030 return !F || F->isDeclaration(); 1031 }; 1032 if (isDeclaration(SymbolMap.lookup(getFuncName()))) { 1033 // Add to the import list only when it's defined out of module. 1034 S.insert(getGUID(getName())); 1035 } 1036 // Import hot CallTargets, which may not be available in IR because full 1037 // profile annotation cannot be done until backend compilation in ThinLTO. 1038 for (const auto &BS : BodySamples) 1039 for (const auto &TS : BS.second.getCallTargets()) 1040 if (TS.getValue() > Threshold) { 1041 const Function *Callee = SymbolMap.lookup(getFuncName(TS.getKey())); 1042 if (isDeclaration(Callee)) 1043 S.insert(getGUID(TS.getKey())); 1044 } 1045 for (const auto &CS : CallsiteSamples) 1046 for (const auto &NameFS : CS.second) 1047 NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold); 1048 } 1049 1050 /// Set the name of the function. 1051 void setName(StringRef FunctionName) { Context.setName(FunctionName); } 1052 1053 /// Return the function name. 1054 StringRef getName() const { return Context.getName(); } 1055 1056 /// Return the original function name. 1057 StringRef getFuncName() const { return getFuncName(getName()); } 1058 1059 void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; } 1060 1061 uint64_t getFunctionHash() const { return FunctionHash; } 1062 1063 void setIRToProfileLocationMap(const LocToLocMap *LTLM) { 1064 assert(IRToProfileLocationMap == nullptr && "this should be set only once"); 1065 IRToProfileLocationMap = LTLM; 1066 } 1067 1068 /// Return the canonical name for a function, taking into account 1069 /// suffix elision policy attributes. 1070 static StringRef getCanonicalFnName(const Function &F) { 1071 auto AttrName = "sample-profile-suffix-elision-policy"; 1072 auto Attr = F.getFnAttribute(AttrName).getValueAsString(); 1073 return getCanonicalFnName(F.getName(), Attr); 1074 } 1075 1076 /// Name suffixes which canonicalization should handle to avoid 1077 /// profile mismatch. 1078 static constexpr const char *LLVMSuffix = ".llvm."; 1079 static constexpr const char *PartSuffix = ".part."; 1080 static constexpr const char *UniqSuffix = ".__uniq."; 1081 1082 static StringRef getCanonicalFnName(StringRef FnName, 1083 StringRef Attr = "selected") { 1084 // Note the sequence of the suffixes in the knownSuffixes array matters. 1085 // If suffix "A" is appended after the suffix "B", "A" should be in front 1086 // of "B" in knownSuffixes. 1087 const char *knownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix}; 1088 if (Attr == "" || Attr == "all") { 1089 return FnName.split('.').first; 1090 } else if (Attr == "selected") { 1091 StringRef Cand(FnName); 1092 for (const auto &Suf : knownSuffixes) { 1093 StringRef Suffix(Suf); 1094 // If the profile contains ".__uniq." suffix, don't strip the 1095 // suffix for names in the IR. 1096 if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix) 1097 continue; 1098 auto It = Cand.rfind(Suffix); 1099 if (It == StringRef::npos) 1100 continue; 1101 auto Dit = Cand.rfind('.'); 1102 if (Dit == It + Suffix.size() - 1) 1103 Cand = Cand.substr(0, It); 1104 } 1105 return Cand; 1106 } else if (Attr == "none") { 1107 return FnName; 1108 } else { 1109 assert(false && "internal error: unknown suffix elision policy"); 1110 } 1111 return FnName; 1112 } 1113 1114 /// Translate \p Name into its original name. 1115 /// When profile doesn't use MD5, \p Name needs no translation. 1116 /// When profile uses MD5, \p Name in current FunctionSamples 1117 /// is actually GUID of the original function name. getFuncName will 1118 /// translate \p Name in current FunctionSamples into its original name 1119 /// by looking up in the function map GUIDToFuncNameMap. 1120 /// If the original name doesn't exist in the map, return empty StringRef. 1121 StringRef getFuncName(StringRef Name) const { 1122 if (!UseMD5) 1123 return Name; 1124 1125 assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first"); 1126 return GUIDToFuncNameMap->lookup(std::stoull(Name.data())); 1127 } 1128 1129 /// Returns the line offset to the start line of the subprogram. 1130 /// We assume that a single function will not exceed 65535 LOC. 1131 static unsigned getOffset(const DILocation *DIL); 1132 1133 /// Returns a unique call site identifier for a given debug location of a call 1134 /// instruction. This is wrapper of two scenarios, the probe-based profile and 1135 /// regular profile, to hide implementation details from the sample loader and 1136 /// the context tracker. 1137 static LineLocation getCallSiteIdentifier(const DILocation *DIL, 1138 bool ProfileIsFS = false); 1139 1140 /// Returns a unique hash code for a combination of a callsite location and 1141 /// the callee function name. 1142 static uint64_t getCallSiteHash(StringRef CalleeName, 1143 const LineLocation &Callsite); 1144 1145 /// Get the FunctionSamples of the inline instance where DIL originates 1146 /// from. 1147 /// 1148 /// The FunctionSamples of the instruction (Machine or IR) associated to 1149 /// \p DIL is the inlined instance in which that instruction is coming from. 1150 /// We traverse the inline stack of that instruction, and match it with the 1151 /// tree nodes in the profile. 1152 /// 1153 /// \returns the FunctionSamples pointer to the inlined instance. 1154 /// If \p Remapper is not nullptr, it will be used to find matching 1155 /// FunctionSamples with not exactly the same but equivalent name. 1156 const FunctionSamples *findFunctionSamples( 1157 const DILocation *DIL, 1158 SampleProfileReaderItaniumRemapper *Remapper = nullptr) const; 1159 1160 static bool ProfileIsProbeBased; 1161 1162 static bool ProfileIsCS; 1163 1164 static bool ProfileIsPreInlined; 1165 1166 SampleContext &getContext() const { return Context; } 1167 1168 void setContext(const SampleContext &FContext) { Context = FContext; } 1169 1170 /// Whether the profile uses MD5 to represent string. 1171 static bool UseMD5; 1172 1173 /// Whether the profile contains any ".__uniq." suffix in a name. 1174 static bool HasUniqSuffix; 1175 1176 /// If this profile uses flow sensitive discriminators. 1177 static bool ProfileIsFS; 1178 1179 /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for 1180 /// all the function symbols defined or declared in current module. 1181 DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr; 1182 1183 // Assume the input \p Name is a name coming from FunctionSamples itself. 1184 // If UseMD5 is true, the name is already a GUID and we 1185 // don't want to return the GUID of GUID. 1186 static uint64_t getGUID(StringRef Name) { 1187 return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name); 1188 } 1189 1190 // Find all the names in the current FunctionSamples including names in 1191 // all the inline instances and names of call targets. 1192 void findAllNames(DenseSet<StringRef> &NameSet) const; 1193 1194 bool operator==(const FunctionSamples &Other) const { 1195 return (GUIDToFuncNameMap == Other.GUIDToFuncNameMap || 1196 (GUIDToFuncNameMap && Other.GUIDToFuncNameMap && 1197 *GUIDToFuncNameMap == *Other.GUIDToFuncNameMap)) && 1198 FunctionHash == Other.FunctionHash && Context == Other.Context && 1199 TotalSamples == Other.TotalSamples && 1200 TotalHeadSamples == Other.TotalHeadSamples && 1201 BodySamples == Other.BodySamples && 1202 CallsiteSamples == Other.CallsiteSamples; 1203 } 1204 1205 bool operator!=(const FunctionSamples &Other) const { 1206 return !(*this == Other); 1207 } 1208 1209 private: 1210 /// CFG hash value for the function. 1211 uint64_t FunctionHash = 0; 1212 1213 /// Calling context for function profile 1214 mutable SampleContext Context; 1215 1216 /// Total number of samples collected inside this function. 1217 /// 1218 /// Samples are cumulative, they include all the samples collected 1219 /// inside this function and all its inlined callees. 1220 uint64_t TotalSamples = 0; 1221 1222 /// Total number of samples collected at the head of the function. 1223 /// This is an approximation of the number of calls made to this function 1224 /// at runtime. 1225 uint64_t TotalHeadSamples = 0; 1226 1227 /// Map instruction locations to collected samples. 1228 /// 1229 /// Each entry in this map contains the number of samples 1230 /// collected at the corresponding line offset. All line locations 1231 /// are an offset from the start of the function. 1232 BodySampleMap BodySamples; 1233 1234 /// Map call sites to collected samples for the called function. 1235 /// 1236 /// Each entry in this map corresponds to all the samples 1237 /// collected for the inlined function call at the given 1238 /// location. For example, given: 1239 /// 1240 /// void foo() { 1241 /// 1 bar(); 1242 /// ... 1243 /// 8 baz(); 1244 /// } 1245 /// 1246 /// If the bar() and baz() calls were inlined inside foo(), this 1247 /// map will contain two entries. One for all the samples collected 1248 /// in the call to bar() at line offset 1, the other for all the samples 1249 /// collected in the call to baz() at line offset 8. 1250 CallsiteSampleMap CallsiteSamples; 1251 1252 /// IR to profile location map generated by stale profile matching. 1253 /// 1254 /// Each entry is a mapping from the location on current build to the matched 1255 /// location in the "stale" profile. For example: 1256 /// Profiled source code: 1257 /// void foo() { 1258 /// 1 bar(); 1259 /// } 1260 /// 1261 /// Current source code: 1262 /// void foo() { 1263 /// 1 // Code change 1264 /// 2 bar(); 1265 /// } 1266 /// Supposing the stale profile matching algorithm generated the mapping [2 -> 1267 /// 1], the profile query using the location of bar on the IR which is 2 will 1268 /// be remapped to 1 and find the location of bar in the profile. 1269 const LocToLocMap *IRToProfileLocationMap = nullptr; 1270 }; 1271 1272 raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS); 1273 1274 using SampleProfileMap = 1275 std::unordered_map<SampleContext, FunctionSamples, SampleContext::Hash>; 1276 1277 using NameFunctionSamples = std::pair<SampleContext, const FunctionSamples *>; 1278 1279 void sortFuncProfiles(const SampleProfileMap &ProfileMap, 1280 std::vector<NameFunctionSamples> &SortedProfiles); 1281 1282 /// Sort a LocationT->SampleT map by LocationT. 1283 /// 1284 /// It produces a sorted list of <LocationT, SampleT> records by ascending 1285 /// order of LocationT. 1286 template <class LocationT, class SampleT> class SampleSorter { 1287 public: 1288 using SamplesWithLoc = std::pair<const LocationT, SampleT>; 1289 using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>; 1290 1291 SampleSorter(const std::map<LocationT, SampleT> &Samples) { 1292 for (const auto &I : Samples) 1293 V.push_back(&I); 1294 llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) { 1295 return A->first < B->first; 1296 }); 1297 } 1298 1299 const SamplesWithLocList &get() const { return V; } 1300 1301 private: 1302 SamplesWithLocList V; 1303 }; 1304 1305 /// SampleContextTrimmer impelements helper functions to trim, merge cold 1306 /// context profiles. It also supports context profile canonicalization to make 1307 /// sure ProfileMap's key is consistent with FunctionSample's name/context. 1308 class SampleContextTrimmer { 1309 public: 1310 SampleContextTrimmer(SampleProfileMap &Profiles) : ProfileMap(Profiles){}; 1311 // Trim and merge cold context profile when requested. TrimBaseProfileOnly 1312 // should only be effective when TrimColdContext is true. On top of 1313 // TrimColdContext, TrimBaseProfileOnly can be used to specify to trim all 1314 // cold profiles or only cold base profiles. Trimming base profiles only is 1315 // mainly to honor the preinliner decsion. Note that when MergeColdContext is 1316 // true, preinliner decsion is not honored anyway so TrimBaseProfileOnly will 1317 // be ignored. 1318 void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold, 1319 bool TrimColdContext, 1320 bool MergeColdContext, 1321 uint32_t ColdContextFrameLength, 1322 bool TrimBaseProfileOnly); 1323 // Canonicalize context profile name and attributes. 1324 void canonicalizeContextProfiles(); 1325 1326 private: 1327 SampleProfileMap &ProfileMap; 1328 }; 1329 1330 /// Helper class for profile conversion. 1331 /// 1332 /// It supports full context-sensitive profile to nested profile conversion, 1333 /// nested profile to flatten profile conversion, etc. 1334 class ProfileConverter { 1335 public: 1336 ProfileConverter(SampleProfileMap &Profiles); 1337 // Convert a full context-sensitive flat sample profile into a nested sample 1338 // profile. 1339 void convertCSProfiles(); 1340 struct FrameNode { 1341 FrameNode(StringRef FName = StringRef(), 1342 FunctionSamples *FSamples = nullptr, 1343 LineLocation CallLoc = {0, 0}) 1344 : FuncName(FName), FuncSamples(FSamples), CallSiteLoc(CallLoc){}; 1345 1346 // Map line+discriminator location to child frame 1347 std::map<uint64_t, FrameNode> AllChildFrames; 1348 // Function name for current frame 1349 StringRef FuncName; 1350 // Function Samples for current frame 1351 FunctionSamples *FuncSamples; 1352 // Callsite location in parent context 1353 LineLocation CallSiteLoc; 1354 1355 FrameNode *getOrCreateChildFrame(const LineLocation &CallSite, 1356 StringRef CalleeName); 1357 }; 1358 1359 static void flattenProfile(SampleProfileMap &ProfileMap, 1360 bool ProfileIsCS = false) { 1361 SampleProfileMap TmpProfiles; 1362 flattenProfile(ProfileMap, TmpProfiles, ProfileIsCS); 1363 ProfileMap = std::move(TmpProfiles); 1364 } 1365 1366 static void flattenProfile(const SampleProfileMap &InputProfiles, 1367 SampleProfileMap &OutputProfiles, 1368 bool ProfileIsCS = false) { 1369 if (ProfileIsCS) { 1370 for (const auto &I : InputProfiles) 1371 OutputProfiles[I.second.getName()].merge(I.second); 1372 // Retain the profile name and clear the full context for each function 1373 // profile. 1374 for (auto &I : OutputProfiles) 1375 I.second.setContext(SampleContext(I.first)); 1376 } else { 1377 for (const auto &I : InputProfiles) 1378 flattenNestedProfile(OutputProfiles, I.second); 1379 } 1380 } 1381 1382 private: 1383 static void flattenNestedProfile(SampleProfileMap &OutputProfiles, 1384 const FunctionSamples &FS) { 1385 // To retain the context, checksum, attributes of the original profile, make 1386 // a copy of it if no profile is found. 1387 SampleContext &Context = FS.getContext(); 1388 auto Ret = OutputProfiles.try_emplace(Context, FS); 1389 FunctionSamples &Profile = Ret.first->second; 1390 if (Ret.second) { 1391 // Clear nested inlinees' samples for the flattened copy. These inlinees 1392 // will have their own top-level entries after flattening. 1393 Profile.removeAllCallsiteSamples(); 1394 // We recompute TotalSamples later, so here set to zero. 1395 Profile.setTotalSamples(0); 1396 } else { 1397 for (const auto &[LineLocation, SampleRecord] : FS.getBodySamples()) { 1398 Profile.addSampleRecord(LineLocation, SampleRecord); 1399 } 1400 } 1401 1402 assert(Profile.getCallsiteSamples().empty() && 1403 "There should be no inlinees' profiles after flattening."); 1404 1405 // TotalSamples might not be equal to the sum of all samples from 1406 // BodySamples and CallsiteSamples. So here we use "TotalSamples = 1407 // Original_TotalSamples - All_of_Callsite_TotalSamples + 1408 // All_of_Callsite_HeadSamples" to compute the new TotalSamples. 1409 uint64_t TotalSamples = FS.getTotalSamples(); 1410 1411 for (const auto &I : FS.getCallsiteSamples()) { 1412 for (const auto &Callee : I.second) { 1413 const auto &CalleeProfile = Callee.second; 1414 // Add body sample. 1415 Profile.addBodySamples(I.first.LineOffset, I.first.Discriminator, 1416 CalleeProfile.getHeadSamplesEstimate()); 1417 // Add callsite sample. 1418 Profile.addCalledTargetSamples( 1419 I.first.LineOffset, I.first.Discriminator, CalleeProfile.getName(), 1420 CalleeProfile.getHeadSamplesEstimate()); 1421 // Update total samples. 1422 TotalSamples = TotalSamples >= CalleeProfile.getTotalSamples() 1423 ? TotalSamples - CalleeProfile.getTotalSamples() 1424 : 0; 1425 TotalSamples += CalleeProfile.getHeadSamplesEstimate(); 1426 // Recursively convert callee profile. 1427 flattenNestedProfile(OutputProfiles, CalleeProfile); 1428 } 1429 } 1430 Profile.addTotalSamples(TotalSamples); 1431 1432 Profile.setHeadSamples(Profile.getHeadSamplesEstimate()); 1433 } 1434 1435 // Nest all children profiles into the profile of Node. 1436 void convertCSProfiles(FrameNode &Node); 1437 FrameNode *getOrCreateContextPath(const SampleContext &Context); 1438 1439 SampleProfileMap &ProfileMap; 1440 FrameNode RootFrame; 1441 }; 1442 1443 /// ProfileSymbolList records the list of function symbols shown up 1444 /// in the binary used to generate the profile. It is useful to 1445 /// to discriminate a function being so cold as not to shown up 1446 /// in the profile and a function newly added. 1447 class ProfileSymbolList { 1448 public: 1449 /// copy indicates whether we need to copy the underlying memory 1450 /// for the input Name. 1451 void add(StringRef Name, bool copy = false) { 1452 if (!copy) { 1453 Syms.insert(Name); 1454 return; 1455 } 1456 Syms.insert(Name.copy(Allocator)); 1457 } 1458 1459 bool contains(StringRef Name) { return Syms.count(Name); } 1460 1461 void merge(const ProfileSymbolList &List) { 1462 for (auto Sym : List.Syms) 1463 add(Sym, true); 1464 } 1465 1466 unsigned size() { return Syms.size(); } 1467 1468 void setToCompress(bool TC) { ToCompress = TC; } 1469 bool toCompress() { return ToCompress; } 1470 1471 std::error_code read(const uint8_t *Data, uint64_t ListSize); 1472 std::error_code write(raw_ostream &OS); 1473 void dump(raw_ostream &OS = dbgs()) const; 1474 1475 private: 1476 // Determine whether or not to compress the symbol list when 1477 // writing it into profile. The variable is unused when the symbol 1478 // list is read from an existing profile. 1479 bool ToCompress = false; 1480 DenseSet<StringRef> Syms; 1481 BumpPtrAllocator Allocator; 1482 }; 1483 1484 } // end namespace sampleprof 1485 1486 using namespace sampleprof; 1487 // Provide DenseMapInfo for SampleContext. 1488 template <> struct DenseMapInfo<SampleContext> { 1489 static inline SampleContext getEmptyKey() { return SampleContext(); } 1490 1491 static inline SampleContext getTombstoneKey() { return SampleContext("@"); } 1492 1493 static unsigned getHashValue(const SampleContext &Val) { 1494 return Val.getHashCode(); 1495 } 1496 1497 static bool isEqual(const SampleContext &LHS, const SampleContext &RHS) { 1498 return LHS == RHS; 1499 } 1500 }; 1501 1502 // Prepend "__uniq" before the hash for tools like profilers to understand 1503 // that this symbol is of internal linkage type. The "__uniq" is the 1504 // pre-determined prefix that is used to tell tools that this symbol was 1505 // created with -funique-internal-linakge-symbols and the tools can strip or 1506 // keep the prefix as needed. 1507 inline std::string getUniqueInternalLinkagePostfix(const StringRef &FName) { 1508 llvm::MD5 Md5; 1509 Md5.update(FName); 1510 llvm::MD5::MD5Result R; 1511 Md5.final(R); 1512 SmallString<32> Str; 1513 llvm::MD5::stringifyResult(R, Str); 1514 // Convert MD5hash to Decimal. Demangler suffixes can either contain 1515 // numbers or characters but not both. 1516 llvm::APInt IntHash(128, Str.str(), 16); 1517 return toString(IntHash, /* Radix = */ 10, /* Signed = */ false) 1518 .insert(0, FunctionSamples::UniqSuffix); 1519 } 1520 1521 } // end namespace llvm 1522 1523 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H 1524