1 //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains common definitions used in the reading and writing of 10 // sample profile data. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H 15 #define LLVM_PROFILEDATA_SAMPLEPROF_H 16 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringMap.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/IR/Function.h" 23 #include "llvm/IR/GlobalValue.h" 24 #include "llvm/Support/Allocator.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/ErrorOr.h" 27 #include "llvm/Support/MathExtras.h" 28 #include <algorithm> 29 #include <cstdint> 30 #include <list> 31 #include <map> 32 #include <set> 33 #include <sstream> 34 #include <string> 35 #include <system_error> 36 #include <unordered_map> 37 #include <utility> 38 39 namespace llvm { 40 41 class DILocation; 42 class raw_ostream; 43 44 const std::error_category &sampleprof_category(); 45 46 enum class sampleprof_error { 47 success = 0, 48 bad_magic, 49 unsupported_version, 50 too_large, 51 truncated, 52 malformed, 53 unrecognized_format, 54 unsupported_writing_format, 55 truncated_name_table, 56 not_implemented, 57 counter_overflow, 58 ostream_seek_unsupported, 59 uncompress_failed, 60 zlib_unavailable, 61 hash_mismatch 62 }; 63 64 inline std::error_code make_error_code(sampleprof_error E) { 65 return std::error_code(static_cast<int>(E), sampleprof_category()); 66 } 67 68 inline sampleprof_error MergeResult(sampleprof_error &Accumulator, 69 sampleprof_error Result) { 70 // Prefer first error encountered as later errors may be secondary effects of 71 // the initial problem. 72 if (Accumulator == sampleprof_error::success && 73 Result != sampleprof_error::success) 74 Accumulator = Result; 75 return Accumulator; 76 } 77 78 } // end namespace llvm 79 80 namespace std { 81 82 template <> 83 struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {}; 84 85 } // end namespace std 86 87 namespace llvm { 88 namespace sampleprof { 89 90 enum SampleProfileFormat { 91 SPF_None = 0, 92 SPF_Text = 0x1, 93 SPF_Compact_Binary = 0x2, 94 SPF_GCC = 0x3, 95 SPF_Ext_Binary = 0x4, 96 SPF_Binary = 0xff 97 }; 98 99 static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) { 100 return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) | 101 uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) | 102 uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) | 103 uint64_t('2') << (64 - 56) | uint64_t(Format); 104 } 105 106 /// Get the proper representation of a string according to whether the 107 /// current Format uses MD5 to represent the string. 108 static inline StringRef getRepInFormat(StringRef Name, bool UseMD5, 109 std::string &GUIDBuf) { 110 if (Name.empty() || !UseMD5) 111 return Name; 112 GUIDBuf = std::to_string(Function::getGUID(Name)); 113 return GUIDBuf; 114 } 115 116 static inline uint64_t SPVersion() { return 103; } 117 118 // Section Type used by SampleProfileExtBinaryBaseReader and 119 // SampleProfileExtBinaryBaseWriter. Never change the existing 120 // value of enum. Only append new ones. 121 enum SecType { 122 SecInValid = 0, 123 SecProfSummary = 1, 124 SecNameTable = 2, 125 SecProfileSymbolList = 3, 126 SecFuncOffsetTable = 4, 127 SecFuncMetadata = 5, 128 SecCSNameTable = 6, 129 // marker for the first type of profile. 130 SecFuncProfileFirst = 32, 131 SecLBRProfile = SecFuncProfileFirst 132 }; 133 134 static inline std::string getSecName(SecType Type) { 135 switch ((int)Type) { // Avoid -Wcovered-switch-default 136 case SecInValid: 137 return "InvalidSection"; 138 case SecProfSummary: 139 return "ProfileSummarySection"; 140 case SecNameTable: 141 return "NameTableSection"; 142 case SecProfileSymbolList: 143 return "ProfileSymbolListSection"; 144 case SecFuncOffsetTable: 145 return "FuncOffsetTableSection"; 146 case SecFuncMetadata: 147 return "FunctionMetadata"; 148 case SecCSNameTable: 149 return "CSNameTableSection"; 150 case SecLBRProfile: 151 return "LBRProfileSection"; 152 default: 153 return "UnknownSection"; 154 } 155 } 156 157 // Entry type of section header table used by SampleProfileExtBinaryBaseReader 158 // and SampleProfileExtBinaryBaseWriter. 159 struct SecHdrTableEntry { 160 SecType Type; 161 uint64_t Flags; 162 uint64_t Offset; 163 uint64_t Size; 164 // The index indicating the location of the current entry in 165 // SectionHdrLayout table. 166 uint32_t LayoutIndex; 167 }; 168 169 // Flags common for all sections are defined here. In SecHdrTableEntry::Flags, 170 // common flags will be saved in the lower 32bits and section specific flags 171 // will be saved in the higher 32 bits. 172 enum class SecCommonFlags : uint32_t { 173 SecFlagInValid = 0, 174 SecFlagCompress = (1 << 0), 175 // Indicate the section contains only profile without context. 176 SecFlagFlat = (1 << 1) 177 }; 178 179 // Section specific flags are defined here. 180 // !!!Note: Everytime a new enum class is created here, please add 181 // a new check in verifySecFlag. 182 enum class SecNameTableFlags : uint32_t { 183 SecFlagInValid = 0, 184 SecFlagMD5Name = (1 << 0), 185 // Store MD5 in fixed length instead of ULEB128 so NameTable can be 186 // accessed like an array. 187 SecFlagFixedLengthMD5 = (1 << 1), 188 // Profile contains ".__uniq." suffix name. Compiler shouldn't strip 189 // the suffix when doing profile matching when seeing the flag. 190 SecFlagUniqSuffix = (1 << 2) 191 }; 192 enum class SecProfSummaryFlags : uint32_t { 193 SecFlagInValid = 0, 194 /// SecFlagPartial means the profile is for common/shared code. 195 /// The common profile is usually merged from profiles collected 196 /// from running other targets. 197 SecFlagPartial = (1 << 0), 198 /// SecFlagContext means this is context-sensitive flat profile for 199 /// CSSPGO 200 SecFlagFullContext = (1 << 1), 201 /// SecFlagFSDiscriminator means this profile uses flow-sensitive 202 /// discriminators. 203 SecFlagFSDiscriminator = (1 << 2), 204 /// SecFlagIsPreInlined means this profile contains ShouldBeInlined 205 /// contexts thus this is CS preinliner computed. 206 SecFlagIsPreInlined = (1 << 4), 207 }; 208 209 enum class SecFuncMetadataFlags : uint32_t { 210 SecFlagInvalid = 0, 211 SecFlagIsProbeBased = (1 << 0), 212 SecFlagHasAttribute = (1 << 1), 213 }; 214 215 enum class SecFuncOffsetFlags : uint32_t { 216 SecFlagInvalid = 0, 217 // Store function offsets in an order of contexts. The order ensures that 218 // callee contexts of a given context laid out next to it. 219 SecFlagOrdered = (1 << 0), 220 }; 221 222 // Verify section specific flag is used for the correct section. 223 template <class SecFlagType> 224 static inline void verifySecFlag(SecType Type, SecFlagType Flag) { 225 // No verification is needed for common flags. 226 if (std::is_same<SecCommonFlags, SecFlagType>()) 227 return; 228 229 // Verification starts here for section specific flag. 230 bool IsFlagLegal = false; 231 switch (Type) { 232 case SecNameTable: 233 IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>(); 234 break; 235 case SecProfSummary: 236 IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>(); 237 break; 238 case SecFuncMetadata: 239 IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>(); 240 break; 241 default: 242 case SecFuncOffsetTable: 243 IsFlagLegal = std::is_same<SecFuncOffsetFlags, SecFlagType>(); 244 break; 245 } 246 if (!IsFlagLegal) 247 llvm_unreachable("Misuse of a flag in an incompatible section"); 248 } 249 250 template <class SecFlagType> 251 static inline void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) { 252 verifySecFlag(Entry.Type, Flag); 253 auto FVal = static_cast<uint64_t>(Flag); 254 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); 255 Entry.Flags |= IsCommon ? FVal : (FVal << 32); 256 } 257 258 template <class SecFlagType> 259 static inline void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) { 260 verifySecFlag(Entry.Type, Flag); 261 auto FVal = static_cast<uint64_t>(Flag); 262 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); 263 Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32)); 264 } 265 266 template <class SecFlagType> 267 static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag) { 268 verifySecFlag(Entry.Type, Flag); 269 auto FVal = static_cast<uint64_t>(Flag); 270 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); 271 return Entry.Flags & (IsCommon ? FVal : (FVal << 32)); 272 } 273 274 /// Represents the relative location of an instruction. 275 /// 276 /// Instruction locations are specified by the line offset from the 277 /// beginning of the function (marked by the line where the function 278 /// header is) and the discriminator value within that line. 279 /// 280 /// The discriminator value is useful to distinguish instructions 281 /// that are on the same line but belong to different basic blocks 282 /// (e.g., the two post-increment instructions in "if (p) x++; else y++;"). 283 struct LineLocation { 284 LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {} 285 286 void print(raw_ostream &OS) const; 287 void dump() const; 288 289 bool operator<(const LineLocation &O) const { 290 return LineOffset < O.LineOffset || 291 (LineOffset == O.LineOffset && Discriminator < O.Discriminator); 292 } 293 294 bool operator==(const LineLocation &O) const { 295 return LineOffset == O.LineOffset && Discriminator == O.Discriminator; 296 } 297 298 bool operator!=(const LineLocation &O) const { 299 return LineOffset != O.LineOffset || Discriminator != O.Discriminator; 300 } 301 302 uint32_t LineOffset; 303 uint32_t Discriminator; 304 }; 305 306 struct LineLocationHash { 307 uint64_t operator()(const LineLocation &Loc) const { 308 return std::hash<std::uint64_t>{}((((uint64_t)Loc.LineOffset) << 32) | 309 Loc.Discriminator); 310 } 311 }; 312 313 raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc); 314 315 /// Representation of a single sample record. 316 /// 317 /// A sample record is represented by a positive integer value, which 318 /// indicates how frequently was the associated line location executed. 319 /// 320 /// Additionally, if the associated location contains a function call, 321 /// the record will hold a list of all the possible called targets. For 322 /// direct calls, this will be the exact function being invoked. For 323 /// indirect calls (function pointers, virtual table dispatch), this 324 /// will be a list of one or more functions. 325 class SampleRecord { 326 public: 327 using CallTarget = std::pair<StringRef, uint64_t>; 328 struct CallTargetComparator { 329 bool operator()(const CallTarget &LHS, const CallTarget &RHS) const { 330 if (LHS.second != RHS.second) 331 return LHS.second > RHS.second; 332 333 return LHS.first < RHS.first; 334 } 335 }; 336 337 using SortedCallTargetSet = std::set<CallTarget, CallTargetComparator>; 338 using CallTargetMap = StringMap<uint64_t>; 339 SampleRecord() = default; 340 341 /// Increment the number of samples for this record by \p S. 342 /// Optionally scale sample count \p S by \p Weight. 343 /// 344 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping 345 /// around unsigned integers. 346 sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) { 347 bool Overflowed; 348 NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed); 349 return Overflowed ? sampleprof_error::counter_overflow 350 : sampleprof_error::success; 351 } 352 353 /// Decrease the number of samples for this record by \p S. Return the amout 354 /// of samples actually decreased. 355 uint64_t removeSamples(uint64_t S) { 356 if (S > NumSamples) 357 S = NumSamples; 358 NumSamples -= S; 359 return S; 360 } 361 362 /// Add called function \p F with samples \p S. 363 /// Optionally scale sample count \p S by \p Weight. 364 /// 365 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping 366 /// around unsigned integers. 367 sampleprof_error addCalledTarget(StringRef F, uint64_t S, 368 uint64_t Weight = 1) { 369 uint64_t &TargetSamples = CallTargets[F]; 370 bool Overflowed; 371 TargetSamples = 372 SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed); 373 return Overflowed ? sampleprof_error::counter_overflow 374 : sampleprof_error::success; 375 } 376 377 /// Remove called function from the call target map. Return the target sample 378 /// count of the called function. 379 uint64_t removeCalledTarget(StringRef F) { 380 uint64_t Count = 0; 381 auto I = CallTargets.find(F); 382 if (I != CallTargets.end()) { 383 Count = I->second; 384 CallTargets.erase(I); 385 } 386 return Count; 387 } 388 389 /// Return true if this sample record contains function calls. 390 bool hasCalls() const { return !CallTargets.empty(); } 391 392 uint64_t getSamples() const { return NumSamples; } 393 const CallTargetMap &getCallTargets() const { return CallTargets; } 394 const SortedCallTargetSet getSortedCallTargets() const { 395 return SortCallTargets(CallTargets); 396 } 397 398 uint64_t getCallTargetSum() const { 399 uint64_t Sum = 0; 400 for (const auto &I : CallTargets) 401 Sum += I.second; 402 return Sum; 403 } 404 405 /// Sort call targets in descending order of call frequency. 406 static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) { 407 SortedCallTargetSet SortedTargets; 408 for (const auto &[Target, Frequency] : Targets) { 409 SortedTargets.emplace(Target, Frequency); 410 } 411 return SortedTargets; 412 } 413 414 /// Prorate call targets by a distribution factor. 415 static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, 416 float DistributionFactor) { 417 CallTargetMap AdjustedTargets; 418 for (const auto &[Target, Frequency] : Targets) { 419 AdjustedTargets[Target] = Frequency * DistributionFactor; 420 } 421 return AdjustedTargets; 422 } 423 424 /// Merge the samples in \p Other into this record. 425 /// Optionally scale sample counts by \p Weight. 426 sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1); 427 void print(raw_ostream &OS, unsigned Indent) const; 428 void dump() const; 429 430 private: 431 uint64_t NumSamples = 0; 432 CallTargetMap CallTargets; 433 }; 434 435 raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); 436 437 // State of context associated with FunctionSamples 438 enum ContextStateMask { 439 UnknownContext = 0x0, // Profile without context 440 RawContext = 0x1, // Full context profile from input profile 441 SyntheticContext = 0x2, // Synthetic context created for context promotion 442 InlinedContext = 0x4, // Profile for context that is inlined into caller 443 MergedContext = 0x8 // Profile for context merged into base profile 444 }; 445 446 // Attribute of context associated with FunctionSamples 447 enum ContextAttributeMask { 448 ContextNone = 0x0, 449 ContextWasInlined = 0x1, // Leaf of context was inlined in previous build 450 ContextShouldBeInlined = 0x2, // Leaf of context should be inlined 451 ContextDuplicatedIntoBase = 452 0x4, // Leaf of context is duplicated into the base profile 453 }; 454 455 // Represents a context frame with function name and line location 456 struct SampleContextFrame { 457 StringRef FuncName; 458 LineLocation Location; 459 460 SampleContextFrame() : Location(0, 0) {} 461 462 SampleContextFrame(StringRef FuncName, LineLocation Location) 463 : FuncName(FuncName), Location(Location) {} 464 465 bool operator==(const SampleContextFrame &That) const { 466 return Location == That.Location && FuncName == That.FuncName; 467 } 468 469 bool operator!=(const SampleContextFrame &That) const { 470 return !(*this == That); 471 } 472 473 std::string toString(bool OutputLineLocation) const { 474 std::ostringstream OContextStr; 475 OContextStr << FuncName.str(); 476 if (OutputLineLocation) { 477 OContextStr << ":" << Location.LineOffset; 478 if (Location.Discriminator) 479 OContextStr << "." << Location.Discriminator; 480 } 481 return OContextStr.str(); 482 } 483 }; 484 485 static inline hash_code hash_value(const SampleContextFrame &arg) { 486 return hash_combine(arg.FuncName, arg.Location.LineOffset, 487 arg.Location.Discriminator); 488 } 489 490 using SampleContextFrameVector = SmallVector<SampleContextFrame, 1>; 491 using SampleContextFrames = ArrayRef<SampleContextFrame>; 492 493 struct SampleContextFrameHash { 494 uint64_t operator()(const SampleContextFrameVector &S) const { 495 return hash_combine_range(S.begin(), S.end()); 496 } 497 }; 498 499 // Sample context for FunctionSamples. It consists of the calling context, 500 // the function name and context state. Internally sample context is represented 501 // using ArrayRef, which is also the input for constructing a `SampleContext`. 502 // It can accept and represent both full context string as well as context-less 503 // function name. 504 // For a CS profile, a full context vector can look like: 505 // `main:3 _Z5funcAi:1 _Z8funcLeafi` 506 // For a base CS profile without calling context, the context vector should only 507 // contain the leaf frame name. 508 // For a non-CS profile, the context vector should be empty. 509 class SampleContext { 510 public: 511 SampleContext() : State(UnknownContext), Attributes(ContextNone) {} 512 513 SampleContext(StringRef Name) 514 : Name(Name), State(UnknownContext), Attributes(ContextNone) {} 515 516 SampleContext(SampleContextFrames Context, 517 ContextStateMask CState = RawContext) 518 : Attributes(ContextNone) { 519 assert(!Context.empty() && "Context is empty"); 520 setContext(Context, CState); 521 } 522 523 // Give a context string, decode and populate internal states like 524 // Function name, Calling context and context state. Example of input 525 // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` 526 SampleContext(StringRef ContextStr, 527 std::list<SampleContextFrameVector> &CSNameTable, 528 ContextStateMask CState = RawContext) 529 : Attributes(ContextNone) { 530 assert(!ContextStr.empty()); 531 // Note that `[]` wrapped input indicates a full context string, otherwise 532 // it's treated as context-less function name only. 533 bool HasContext = ContextStr.startswith("["); 534 if (!HasContext) { 535 State = UnknownContext; 536 Name = ContextStr; 537 } else { 538 CSNameTable.emplace_back(); 539 SampleContextFrameVector &Context = CSNameTable.back(); 540 createCtxVectorFromStr(ContextStr, Context); 541 setContext(Context, CState); 542 } 543 } 544 545 /// Create a context vector from a given context string and save it in 546 /// `Context`. 547 static void createCtxVectorFromStr(StringRef ContextStr, 548 SampleContextFrameVector &Context) { 549 // Remove encapsulating '[' and ']' if any 550 ContextStr = ContextStr.substr(1, ContextStr.size() - 2); 551 StringRef ContextRemain = ContextStr; 552 StringRef ChildContext; 553 StringRef CalleeName; 554 while (!ContextRemain.empty()) { 555 auto ContextSplit = ContextRemain.split(" @ "); 556 ChildContext = ContextSplit.first; 557 ContextRemain = ContextSplit.second; 558 LineLocation CallSiteLoc(0, 0); 559 decodeContextString(ChildContext, CalleeName, CallSiteLoc); 560 Context.emplace_back(CalleeName, CallSiteLoc); 561 } 562 } 563 564 // Decode context string for a frame to get function name and location. 565 // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`. 566 static void decodeContextString(StringRef ContextStr, StringRef &FName, 567 LineLocation &LineLoc) { 568 // Get function name 569 auto EntrySplit = ContextStr.split(':'); 570 FName = EntrySplit.first; 571 572 LineLoc = {0, 0}; 573 if (!EntrySplit.second.empty()) { 574 // Get line offset, use signed int for getAsInteger so string will 575 // be parsed as signed. 576 int LineOffset = 0; 577 auto LocSplit = EntrySplit.second.split('.'); 578 LocSplit.first.getAsInteger(10, LineOffset); 579 LineLoc.LineOffset = LineOffset; 580 581 // Get discriminator 582 if (!LocSplit.second.empty()) 583 LocSplit.second.getAsInteger(10, LineLoc.Discriminator); 584 } 585 } 586 587 operator SampleContextFrames() const { return FullContext; } 588 bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; } 589 void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; } 590 uint32_t getAllAttributes() { return Attributes; } 591 void setAllAttributes(uint32_t A) { Attributes = A; } 592 bool hasState(ContextStateMask S) { return State & (uint32_t)S; } 593 void setState(ContextStateMask S) { State |= (uint32_t)S; } 594 void clearState(ContextStateMask S) { State &= (uint32_t)~S; } 595 bool hasContext() const { return State != UnknownContext; } 596 bool isBaseContext() const { return FullContext.size() == 1; } 597 StringRef getName() const { return Name; } 598 SampleContextFrames getContextFrames() const { return FullContext; } 599 600 static std::string getContextString(SampleContextFrames Context, 601 bool IncludeLeafLineLocation = false) { 602 std::ostringstream OContextStr; 603 for (uint32_t I = 0; I < Context.size(); I++) { 604 if (OContextStr.str().size()) { 605 OContextStr << " @ "; 606 } 607 OContextStr << Context[I].toString(I != Context.size() - 1 || 608 IncludeLeafLineLocation); 609 } 610 return OContextStr.str(); 611 } 612 613 std::string toString() const { 614 if (!hasContext()) 615 return Name.str(); 616 return getContextString(FullContext, false); 617 } 618 619 uint64_t getHashCode() const { 620 return hasContext() ? hash_value(getContextFrames()) 621 : hash_value(getName()); 622 } 623 624 /// Set the name of the function and clear the current context. 625 void setName(StringRef FunctionName) { 626 Name = FunctionName; 627 FullContext = SampleContextFrames(); 628 State = UnknownContext; 629 } 630 631 void setContext(SampleContextFrames Context, 632 ContextStateMask CState = RawContext) { 633 assert(CState != UnknownContext); 634 FullContext = Context; 635 Name = Context.back().FuncName; 636 State = CState; 637 } 638 639 bool operator==(const SampleContext &That) const { 640 return State == That.State && Name == That.Name && 641 FullContext == That.FullContext; 642 } 643 644 bool operator!=(const SampleContext &That) const { return !(*this == That); } 645 646 bool operator<(const SampleContext &That) const { 647 if (State != That.State) 648 return State < That.State; 649 650 if (!hasContext()) { 651 return Name < That.Name; 652 } 653 654 uint64_t I = 0; 655 while (I < std::min(FullContext.size(), That.FullContext.size())) { 656 auto &Context1 = FullContext[I]; 657 auto &Context2 = That.FullContext[I]; 658 auto V = Context1.FuncName.compare(Context2.FuncName); 659 if (V) 660 return V < 0; 661 if (Context1.Location != Context2.Location) 662 return Context1.Location < Context2.Location; 663 I++; 664 } 665 666 return FullContext.size() < That.FullContext.size(); 667 } 668 669 struct Hash { 670 uint64_t operator()(const SampleContext &Context) const { 671 return Context.getHashCode(); 672 } 673 }; 674 675 bool IsPrefixOf(const SampleContext &That) const { 676 auto ThisContext = FullContext; 677 auto ThatContext = That.FullContext; 678 if (ThatContext.size() < ThisContext.size()) 679 return false; 680 ThatContext = ThatContext.take_front(ThisContext.size()); 681 // Compare Leaf frame first 682 if (ThisContext.back().FuncName != ThatContext.back().FuncName) 683 return false; 684 // Compare leading context 685 return ThisContext.drop_back() == ThatContext.drop_back(); 686 } 687 688 private: 689 /// Mangled name of the function. 690 StringRef Name; 691 // Full context including calling context and leaf function name 692 SampleContextFrames FullContext; 693 // State of the associated sample profile 694 uint32_t State; 695 // Attribute of the associated sample profile 696 uint32_t Attributes; 697 }; 698 699 static inline hash_code hash_value(const SampleContext &arg) { 700 return arg.hasContext() ? hash_value(arg.getContextFrames()) 701 : hash_value(arg.getName()); 702 } 703 704 class FunctionSamples; 705 class SampleProfileReaderItaniumRemapper; 706 707 using BodySampleMap = std::map<LineLocation, SampleRecord>; 708 // NOTE: Using a StringMap here makes parsed profiles consume around 17% more 709 // memory, which is *very* significant for large profiles. 710 using FunctionSamplesMap = std::map<std::string, FunctionSamples, std::less<>>; 711 using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>; 712 713 /// Representation of the samples collected for a function. 714 /// 715 /// This data structure contains all the collected samples for the body 716 /// of a function. Each sample corresponds to a LineLocation instance 717 /// within the body of the function. 718 class FunctionSamples { 719 public: 720 FunctionSamples() = default; 721 722 void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const; 723 void dump() const; 724 725 sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) { 726 bool Overflowed; 727 TotalSamples = 728 SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed); 729 return Overflowed ? sampleprof_error::counter_overflow 730 : sampleprof_error::success; 731 } 732 733 void removeTotalSamples(uint64_t Num) { 734 if (TotalSamples < Num) 735 TotalSamples = 0; 736 else 737 TotalSamples -= Num; 738 } 739 740 void setTotalSamples(uint64_t Num) { TotalSamples = Num; } 741 742 sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { 743 bool Overflowed; 744 TotalHeadSamples = 745 SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed); 746 return Overflowed ? sampleprof_error::counter_overflow 747 : sampleprof_error::success; 748 } 749 750 sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator, 751 uint64_t Num, uint64_t Weight = 1) { 752 return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples( 753 Num, Weight); 754 } 755 756 sampleprof_error addCalledTargetSamples(uint32_t LineOffset, 757 uint32_t Discriminator, 758 StringRef FName, uint64_t Num, 759 uint64_t Weight = 1) { 760 return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget( 761 FName, Num, Weight); 762 } 763 764 // Remove a call target and decrease the body sample correspondingly. Return 765 // the number of body samples actually decreased. 766 uint64_t removeCalledTargetAndBodySample(uint32_t LineOffset, 767 uint32_t Discriminator, 768 StringRef FName) { 769 uint64_t Count = 0; 770 auto I = BodySamples.find(LineLocation(LineOffset, Discriminator)); 771 if (I != BodySamples.end()) { 772 Count = I->second.removeCalledTarget(FName); 773 Count = I->second.removeSamples(Count); 774 if (!I->second.getSamples()) 775 BodySamples.erase(I); 776 } 777 return Count; 778 } 779 780 sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num, 781 uint64_t Weight = 1) { 782 SampleRecord S; 783 S.addSamples(Num, Weight); 784 return BodySamples[LineLocation(Index, 0)].merge(S, Weight); 785 } 786 787 // Accumulate all call target samples to update the body samples. 788 void updateCallsiteSamples() { 789 for (auto &I : BodySamples) { 790 uint64_t TargetSamples = I.second.getCallTargetSum(); 791 // It's possible that the body sample count can be greater than the call 792 // target sum. E.g, if some call targets are external targets, they won't 793 // be considered valid call targets, but the body sample count which is 794 // from lbr ranges can actually include them. 795 if (TargetSamples > I.second.getSamples()) 796 I.second.addSamples(TargetSamples - I.second.getSamples()); 797 } 798 } 799 800 // Accumulate all body samples to set total samples. 801 void updateTotalSamples() { 802 setTotalSamples(0); 803 for (const auto &I : BodySamples) 804 addTotalSamples(I.second.getSamples()); 805 806 for (auto &I : CallsiteSamples) { 807 for (auto &CS : I.second) { 808 CS.second.updateTotalSamples(); 809 addTotalSamples(CS.second.getTotalSamples()); 810 } 811 } 812 } 813 814 // Set current context and all callee contexts to be synthetic. 815 void SetContextSynthetic() { 816 Context.setState(SyntheticContext); 817 for (auto &I : CallsiteSamples) { 818 for (auto &CS : I.second) { 819 CS.second.SetContextSynthetic(); 820 } 821 } 822 } 823 824 /// Return the number of samples collected at the given location. 825 /// Each location is specified by \p LineOffset and \p Discriminator. 826 /// If the location is not found in profile, return error. 827 ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset, 828 uint32_t Discriminator) const { 829 const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); 830 if (ret == BodySamples.end()) 831 return std::error_code(); 832 return ret->second.getSamples(); 833 } 834 835 /// Returns the call target map collected at a given location. 836 /// Each location is specified by \p LineOffset and \p Discriminator. 837 /// If the location is not found in profile, return error. 838 ErrorOr<SampleRecord::CallTargetMap> 839 findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const { 840 const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); 841 if (ret == BodySamples.end()) 842 return std::error_code(); 843 return ret->second.getCallTargets(); 844 } 845 846 /// Returns the call target map collected at a given location specified by \p 847 /// CallSite. If the location is not found in profile, return error. 848 ErrorOr<SampleRecord::CallTargetMap> 849 findCallTargetMapAt(const LineLocation &CallSite) const { 850 const auto &Ret = BodySamples.find(CallSite); 851 if (Ret == BodySamples.end()) 852 return std::error_code(); 853 return Ret->second.getCallTargets(); 854 } 855 856 /// Return the function samples at the given callsite location. 857 FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) { 858 return CallsiteSamples[Loc]; 859 } 860 861 /// Returns the FunctionSamplesMap at the given \p Loc. 862 const FunctionSamplesMap * 863 findFunctionSamplesMapAt(const LineLocation &Loc) const { 864 auto iter = CallsiteSamples.find(Loc); 865 if (iter == CallsiteSamples.end()) 866 return nullptr; 867 return &iter->second; 868 } 869 870 /// Returns a pointer to FunctionSamples at the given callsite location 871 /// \p Loc with callee \p CalleeName. If no callsite can be found, relax 872 /// the restriction to return the FunctionSamples at callsite location 873 /// \p Loc with the maximum total sample count. If \p Remapper is not 874 /// nullptr, use \p Remapper to find FunctionSamples with equivalent name 875 /// as \p CalleeName. 876 const FunctionSamples * 877 findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName, 878 SampleProfileReaderItaniumRemapper *Remapper) const; 879 880 bool empty() const { return TotalSamples == 0; } 881 882 /// Return the total number of samples collected inside the function. 883 uint64_t getTotalSamples() const { return TotalSamples; } 884 885 /// For top-level functions, return the total number of branch samples that 886 /// have the function as the branch target (or 0 otherwise). This is the raw 887 /// data fetched from the profile. This should be equivalent to the sample of 888 /// the first instruction of the symbol. But as we directly get this info for 889 /// raw profile without referring to potentially inaccurate debug info, this 890 /// gives more accurate profile data and is preferred for standalone symbols. 891 uint64_t getHeadSamples() const { return TotalHeadSamples; } 892 893 /// Return an estimate of the sample count of the function entry basic block. 894 /// The function can be either a standalone symbol or an inlined function. 895 /// For Context-Sensitive profiles, this will prefer returning the head 896 /// samples (i.e. getHeadSamples()), if non-zero. Otherwise it estimates from 897 /// the function body's samples or callsite samples. 898 uint64_t getHeadSamplesEstimate() const { 899 if (FunctionSamples::ProfileIsCS && getHeadSamples()) { 900 // For CS profile, if we already have more accurate head samples 901 // counted by branch sample from caller, use them as entry samples. 902 return getHeadSamples(); 903 } 904 uint64_t Count = 0; 905 // Use either BodySamples or CallsiteSamples which ever has the smaller 906 // lineno. 907 if (!BodySamples.empty() && 908 (CallsiteSamples.empty() || 909 BodySamples.begin()->first < CallsiteSamples.begin()->first)) 910 Count = BodySamples.begin()->second.getSamples(); 911 else if (!CallsiteSamples.empty()) { 912 // An indirect callsite may be promoted to several inlined direct calls. 913 // We need to get the sum of them. 914 for (const auto &N_FS : CallsiteSamples.begin()->second) 915 Count += N_FS.second.getHeadSamplesEstimate(); 916 } 917 // Return at least 1 if total sample is not 0. 918 return Count ? Count : TotalSamples > 0; 919 } 920 921 /// Return all the samples collected in the body of the function. 922 const BodySampleMap &getBodySamples() const { return BodySamples; } 923 924 /// Return all the callsite samples collected in the body of the function. 925 const CallsiteSampleMap &getCallsiteSamples() const { 926 return CallsiteSamples; 927 } 928 929 /// Return the maximum of sample counts in a function body. When SkipCallSite 930 /// is false, which is the default, the return count includes samples in the 931 /// inlined functions. When SkipCallSite is true, the return count only 932 /// considers the body samples. 933 uint64_t getMaxCountInside(bool SkipCallSite = false) const { 934 uint64_t MaxCount = 0; 935 for (const auto &L : getBodySamples()) 936 MaxCount = std::max(MaxCount, L.second.getSamples()); 937 if (SkipCallSite) 938 return MaxCount; 939 for (const auto &C : getCallsiteSamples()) 940 for (const FunctionSamplesMap::value_type &F : C.second) 941 MaxCount = std::max(MaxCount, F.second.getMaxCountInside()); 942 return MaxCount; 943 } 944 945 /// Merge the samples in \p Other into this one. 946 /// Optionally scale samples by \p Weight. 947 sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) { 948 sampleprof_error Result = sampleprof_error::success; 949 if (!GUIDToFuncNameMap) 950 GUIDToFuncNameMap = Other.GUIDToFuncNameMap; 951 if (Context.getName().empty()) 952 Context = Other.getContext(); 953 if (FunctionHash == 0) { 954 // Set the function hash code for the target profile. 955 FunctionHash = Other.getFunctionHash(); 956 } else if (FunctionHash != Other.getFunctionHash()) { 957 // The two profiles coming with different valid hash codes indicates 958 // either: 959 // 1. They are same-named static functions from different compilation 960 // units (without using -unique-internal-linkage-names), or 961 // 2. They are really the same function but from different compilations. 962 // Let's bail out in either case for now, which means one profile is 963 // dropped. 964 return sampleprof_error::hash_mismatch; 965 } 966 967 MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight)); 968 MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight)); 969 for (const auto &I : Other.getBodySamples()) { 970 const LineLocation &Loc = I.first; 971 const SampleRecord &Rec = I.second; 972 MergeResult(Result, BodySamples[Loc].merge(Rec, Weight)); 973 } 974 for (const auto &I : Other.getCallsiteSamples()) { 975 const LineLocation &Loc = I.first; 976 FunctionSamplesMap &FSMap = functionSamplesAt(Loc); 977 for (const auto &Rec : I.second) 978 MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight)); 979 } 980 return Result; 981 } 982 983 /// Recursively traverses all children, if the total sample count of the 984 /// corresponding function is no less than \p Threshold, add its corresponding 985 /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID 986 /// to \p S. 987 void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, 988 const StringMap<Function *> &SymbolMap, 989 uint64_t Threshold) const { 990 if (TotalSamples <= Threshold) 991 return; 992 auto isDeclaration = [](const Function *F) { 993 return !F || F->isDeclaration(); 994 }; 995 if (isDeclaration(SymbolMap.lookup(getFuncName()))) { 996 // Add to the import list only when it's defined out of module. 997 S.insert(getGUID(getName())); 998 } 999 // Import hot CallTargets, which may not be available in IR because full 1000 // profile annotation cannot be done until backend compilation in ThinLTO. 1001 for (const auto &BS : BodySamples) 1002 for (const auto &TS : BS.second.getCallTargets()) 1003 if (TS.getValue() > Threshold) { 1004 const Function *Callee = SymbolMap.lookup(getFuncName(TS.getKey())); 1005 if (isDeclaration(Callee)) 1006 S.insert(getGUID(TS.getKey())); 1007 } 1008 for (const auto &CS : CallsiteSamples) 1009 for (const auto &NameFS : CS.second) 1010 NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold); 1011 } 1012 1013 /// Set the name of the function. 1014 void setName(StringRef FunctionName) { Context.setName(FunctionName); } 1015 1016 /// Return the function name. 1017 StringRef getName() const { return Context.getName(); } 1018 1019 /// Return the original function name. 1020 StringRef getFuncName() const { return getFuncName(getName()); } 1021 1022 void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; } 1023 1024 uint64_t getFunctionHash() const { return FunctionHash; } 1025 1026 /// Return the canonical name for a function, taking into account 1027 /// suffix elision policy attributes. 1028 static StringRef getCanonicalFnName(const Function &F) { 1029 auto AttrName = "sample-profile-suffix-elision-policy"; 1030 auto Attr = F.getFnAttribute(AttrName).getValueAsString(); 1031 return getCanonicalFnName(F.getName(), Attr); 1032 } 1033 1034 /// Name suffixes which canonicalization should handle to avoid 1035 /// profile mismatch. 1036 static constexpr const char *LLVMSuffix = ".llvm."; 1037 static constexpr const char *PartSuffix = ".part."; 1038 static constexpr const char *UniqSuffix = ".__uniq."; 1039 1040 static StringRef getCanonicalFnName(StringRef FnName, 1041 StringRef Attr = "selected") { 1042 // Note the sequence of the suffixes in the knownSuffixes array matters. 1043 // If suffix "A" is appended after the suffix "B", "A" should be in front 1044 // of "B" in knownSuffixes. 1045 const char *knownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix}; 1046 if (Attr == "" || Attr == "all") { 1047 return FnName.split('.').first; 1048 } else if (Attr == "selected") { 1049 StringRef Cand(FnName); 1050 for (const auto &Suf : knownSuffixes) { 1051 StringRef Suffix(Suf); 1052 // If the profile contains ".__uniq." suffix, don't strip the 1053 // suffix for names in the IR. 1054 if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix) 1055 continue; 1056 auto It = Cand.rfind(Suffix); 1057 if (It == StringRef::npos) 1058 continue; 1059 auto Dit = Cand.rfind('.'); 1060 if (Dit == It + Suffix.size() - 1) 1061 Cand = Cand.substr(0, It); 1062 } 1063 return Cand; 1064 } else if (Attr == "none") { 1065 return FnName; 1066 } else { 1067 assert(false && "internal error: unknown suffix elision policy"); 1068 } 1069 return FnName; 1070 } 1071 1072 /// Translate \p Name into its original name. 1073 /// When profile doesn't use MD5, \p Name needs no translation. 1074 /// When profile uses MD5, \p Name in current FunctionSamples 1075 /// is actually GUID of the original function name. getFuncName will 1076 /// translate \p Name in current FunctionSamples into its original name 1077 /// by looking up in the function map GUIDToFuncNameMap. 1078 /// If the original name doesn't exist in the map, return empty StringRef. 1079 StringRef getFuncName(StringRef Name) const { 1080 if (!UseMD5) 1081 return Name; 1082 1083 assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first"); 1084 return GUIDToFuncNameMap->lookup(std::stoull(Name.data())); 1085 } 1086 1087 /// Returns the line offset to the start line of the subprogram. 1088 /// We assume that a single function will not exceed 65535 LOC. 1089 static unsigned getOffset(const DILocation *DIL); 1090 1091 /// Returns a unique call site identifier for a given debug location of a call 1092 /// instruction. This is wrapper of two scenarios, the probe-based profile and 1093 /// regular profile, to hide implementation details from the sample loader and 1094 /// the context tracker. 1095 static LineLocation getCallSiteIdentifier(const DILocation *DIL, 1096 bool ProfileIsFS = false); 1097 1098 /// Returns a unique hash code for a combination of a callsite location and 1099 /// the callee function name. 1100 static uint64_t getCallSiteHash(StringRef CalleeName, 1101 const LineLocation &Callsite); 1102 1103 /// Get the FunctionSamples of the inline instance where DIL originates 1104 /// from. 1105 /// 1106 /// The FunctionSamples of the instruction (Machine or IR) associated to 1107 /// \p DIL is the inlined instance in which that instruction is coming from. 1108 /// We traverse the inline stack of that instruction, and match it with the 1109 /// tree nodes in the profile. 1110 /// 1111 /// \returns the FunctionSamples pointer to the inlined instance. 1112 /// If \p Remapper is not nullptr, it will be used to find matching 1113 /// FunctionSamples with not exactly the same but equivalent name. 1114 const FunctionSamples *findFunctionSamples( 1115 const DILocation *DIL, 1116 SampleProfileReaderItaniumRemapper *Remapper = nullptr) const; 1117 1118 static bool ProfileIsProbeBased; 1119 1120 static bool ProfileIsCS; 1121 1122 static bool ProfileIsPreInlined; 1123 1124 SampleContext &getContext() const { return Context; } 1125 1126 void setContext(const SampleContext &FContext) { Context = FContext; } 1127 1128 /// Whether the profile uses MD5 to represent string. 1129 static bool UseMD5; 1130 1131 /// Whether the profile contains any ".__uniq." suffix in a name. 1132 static bool HasUniqSuffix; 1133 1134 /// If this profile uses flow sensitive discriminators. 1135 static bool ProfileIsFS; 1136 1137 /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for 1138 /// all the function symbols defined or declared in current module. 1139 DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr; 1140 1141 // Assume the input \p Name is a name coming from FunctionSamples itself. 1142 // If UseMD5 is true, the name is already a GUID and we 1143 // don't want to return the GUID of GUID. 1144 static uint64_t getGUID(StringRef Name) { 1145 return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name); 1146 } 1147 1148 // Find all the names in the current FunctionSamples including names in 1149 // all the inline instances and names of call targets. 1150 void findAllNames(DenseSet<StringRef> &NameSet) const; 1151 1152 private: 1153 /// CFG hash value for the function. 1154 uint64_t FunctionHash = 0; 1155 1156 /// Calling context for function profile 1157 mutable SampleContext Context; 1158 1159 /// Total number of samples collected inside this function. 1160 /// 1161 /// Samples are cumulative, they include all the samples collected 1162 /// inside this function and all its inlined callees. 1163 uint64_t TotalSamples = 0; 1164 1165 /// Total number of samples collected at the head of the function. 1166 /// This is an approximation of the number of calls made to this function 1167 /// at runtime. 1168 uint64_t TotalHeadSamples = 0; 1169 1170 /// Map instruction locations to collected samples. 1171 /// 1172 /// Each entry in this map contains the number of samples 1173 /// collected at the corresponding line offset. All line locations 1174 /// are an offset from the start of the function. 1175 BodySampleMap BodySamples; 1176 1177 /// Map call sites to collected samples for the called function. 1178 /// 1179 /// Each entry in this map corresponds to all the samples 1180 /// collected for the inlined function call at the given 1181 /// location. For example, given: 1182 /// 1183 /// void foo() { 1184 /// 1 bar(); 1185 /// ... 1186 /// 8 baz(); 1187 /// } 1188 /// 1189 /// If the bar() and baz() calls were inlined inside foo(), this 1190 /// map will contain two entries. One for all the samples collected 1191 /// in the call to bar() at line offset 1, the other for all the samples 1192 /// collected in the call to baz() at line offset 8. 1193 CallsiteSampleMap CallsiteSamples; 1194 }; 1195 1196 raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS); 1197 1198 using SampleProfileMap = 1199 std::unordered_map<SampleContext, FunctionSamples, SampleContext::Hash>; 1200 1201 using NameFunctionSamples = std::pair<SampleContext, const FunctionSamples *>; 1202 1203 void sortFuncProfiles(const SampleProfileMap &ProfileMap, 1204 std::vector<NameFunctionSamples> &SortedProfiles); 1205 1206 /// Sort a LocationT->SampleT map by LocationT. 1207 /// 1208 /// It produces a sorted list of <LocationT, SampleT> records by ascending 1209 /// order of LocationT. 1210 template <class LocationT, class SampleT> class SampleSorter { 1211 public: 1212 using SamplesWithLoc = std::pair<const LocationT, SampleT>; 1213 using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>; 1214 1215 SampleSorter(const std::map<LocationT, SampleT> &Samples) { 1216 for (const auto &I : Samples) 1217 V.push_back(&I); 1218 llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) { 1219 return A->first < B->first; 1220 }); 1221 } 1222 1223 const SamplesWithLocList &get() const { return V; } 1224 1225 private: 1226 SamplesWithLocList V; 1227 }; 1228 1229 /// SampleContextTrimmer impelements helper functions to trim, merge cold 1230 /// context profiles. It also supports context profile canonicalization to make 1231 /// sure ProfileMap's key is consistent with FunctionSample's name/context. 1232 class SampleContextTrimmer { 1233 public: 1234 SampleContextTrimmer(SampleProfileMap &Profiles) : ProfileMap(Profiles){}; 1235 // Trim and merge cold context profile when requested. TrimBaseProfileOnly 1236 // should only be effective when TrimColdContext is true. On top of 1237 // TrimColdContext, TrimBaseProfileOnly can be used to specify to trim all 1238 // cold profiles or only cold base profiles. Trimming base profiles only is 1239 // mainly to honor the preinliner decsion. Note that when MergeColdContext is 1240 // true, preinliner decsion is not honored anyway so TrimBaseProfileOnly will 1241 // be ignored. 1242 void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold, 1243 bool TrimColdContext, 1244 bool MergeColdContext, 1245 uint32_t ColdContextFrameLength, 1246 bool TrimBaseProfileOnly); 1247 // Canonicalize context profile name and attributes. 1248 void canonicalizeContextProfiles(); 1249 1250 private: 1251 SampleProfileMap &ProfileMap; 1252 }; 1253 1254 // CSProfileConverter converts a full context-sensitive flat sample profile into 1255 // a nested context-sensitive sample profile. 1256 class CSProfileConverter { 1257 public: 1258 CSProfileConverter(SampleProfileMap &Profiles); 1259 void convertProfiles(); 1260 struct FrameNode { 1261 FrameNode(StringRef FName = StringRef(), 1262 FunctionSamples *FSamples = nullptr, 1263 LineLocation CallLoc = {0, 0}) 1264 : FuncName(FName), FuncSamples(FSamples), CallSiteLoc(CallLoc){}; 1265 1266 // Map line+discriminator location to child frame 1267 std::map<uint64_t, FrameNode> AllChildFrames; 1268 // Function name for current frame 1269 StringRef FuncName; 1270 // Function Samples for current frame 1271 FunctionSamples *FuncSamples; 1272 // Callsite location in parent context 1273 LineLocation CallSiteLoc; 1274 1275 FrameNode *getOrCreateChildFrame(const LineLocation &CallSite, 1276 StringRef CalleeName); 1277 }; 1278 1279 private: 1280 // Nest all children profiles into the profile of Node. 1281 void convertProfiles(FrameNode &Node); 1282 FrameNode *getOrCreateContextPath(const SampleContext &Context); 1283 1284 SampleProfileMap &ProfileMap; 1285 FrameNode RootFrame; 1286 }; 1287 1288 /// ProfileSymbolList records the list of function symbols shown up 1289 /// in the binary used to generate the profile. It is useful to 1290 /// to discriminate a function being so cold as not to shown up 1291 /// in the profile and a function newly added. 1292 class ProfileSymbolList { 1293 public: 1294 /// copy indicates whether we need to copy the underlying memory 1295 /// for the input Name. 1296 void add(StringRef Name, bool copy = false) { 1297 if (!copy) { 1298 Syms.insert(Name); 1299 return; 1300 } 1301 Syms.insert(Name.copy(Allocator)); 1302 } 1303 1304 bool contains(StringRef Name) { return Syms.count(Name); } 1305 1306 void merge(const ProfileSymbolList &List) { 1307 for (auto Sym : List.Syms) 1308 add(Sym, true); 1309 } 1310 1311 unsigned size() { return Syms.size(); } 1312 1313 void setToCompress(bool TC) { ToCompress = TC; } 1314 bool toCompress() { return ToCompress; } 1315 1316 std::error_code read(const uint8_t *Data, uint64_t ListSize); 1317 std::error_code write(raw_ostream &OS); 1318 void dump(raw_ostream &OS = dbgs()) const; 1319 1320 private: 1321 // Determine whether or not to compress the symbol list when 1322 // writing it into profile. The variable is unused when the symbol 1323 // list is read from an existing profile. 1324 bool ToCompress = false; 1325 DenseSet<StringRef> Syms; 1326 BumpPtrAllocator Allocator; 1327 }; 1328 1329 } // end namespace sampleprof 1330 1331 using namespace sampleprof; 1332 // Provide DenseMapInfo for SampleContext. 1333 template <> struct DenseMapInfo<SampleContext> { 1334 static inline SampleContext getEmptyKey() { return SampleContext(); } 1335 1336 static inline SampleContext getTombstoneKey() { return SampleContext("@"); } 1337 1338 static unsigned getHashValue(const SampleContext &Val) { 1339 return Val.getHashCode(); 1340 } 1341 1342 static bool isEqual(const SampleContext &LHS, const SampleContext &RHS) { 1343 return LHS == RHS; 1344 } 1345 }; 1346 1347 // Prepend "__uniq" before the hash for tools like profilers to understand 1348 // that this symbol is of internal linkage type. The "__uniq" is the 1349 // pre-determined prefix that is used to tell tools that this symbol was 1350 // created with -funique-internal-linakge-symbols and the tools can strip or 1351 // keep the prefix as needed. 1352 inline std::string getUniqueInternalLinkagePostfix(const StringRef &FName) { 1353 llvm::MD5 Md5; 1354 Md5.update(FName); 1355 llvm::MD5::MD5Result R; 1356 Md5.final(R); 1357 SmallString<32> Str; 1358 llvm::MD5::stringifyResult(R, Str); 1359 // Convert MD5hash to Decimal. Demangler suffixes can either contain 1360 // numbers or characters but not both. 1361 llvm::APInt IntHash(128, Str.str(), 16); 1362 return toString(IntHash, /* Radix = */ 10, /* Signed = */ false) 1363 .insert(0, FunctionSamples::UniqSuffix); 1364 } 1365 1366 } // end namespace llvm 1367 1368 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H 1369