1 //===- InstrProf.h - Instrumented profiling format support ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Instrumentation-based profiling data is generated by instrumented 10 // binaries through library functions in compiler-rt, and read by the clang 11 // frontend to feed PGO. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_PROFILEDATA_INSTRPROF_H 16 #define LLVM_PROFILEDATA_INSTRPROF_H 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Triple.h" 23 #include "llvm/IR/GlobalValue.h" 24 #include "llvm/IR/ProfileSummary.h" 25 #include "llvm/ProfileData/InstrProfData.inc" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Compiler.h" 28 #include "llvm/Support/Endian.h" 29 #include "llvm/Support/Error.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/Host.h" 32 #include "llvm/Support/MD5.h" 33 #include "llvm/Support/MathExtras.h" 34 #include "llvm/Support/raw_ostream.h" 35 #include <algorithm> 36 #include <cassert> 37 #include <cstddef> 38 #include <cstdint> 39 #include <cstring> 40 #include <list> 41 #include <memory> 42 #include <string> 43 #include <system_error> 44 #include <utility> 45 #include <vector> 46 47 namespace llvm { 48 49 class Function; 50 class GlobalVariable; 51 struct InstrProfRecord; 52 class InstrProfSymtab; 53 class Instruction; 54 class MDNode; 55 class Module; 56 57 enum InstrProfSectKind { 58 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind, 59 #include "llvm/ProfileData/InstrProfData.inc" 60 }; 61 62 /// Return the name of the profile section corresponding to \p IPSK. 63 /// 64 /// The name of the section depends on the object format type \p OF. If 65 /// \p AddSegmentInfo is true, a segment prefix and additional linker hints may 66 /// be added to the section name (this is the default). 67 std::string getInstrProfSectionName(InstrProfSectKind IPSK, 68 Triple::ObjectFormatType OF, 69 bool AddSegmentInfo = true); 70 71 /// Return the name profile runtime entry point to do value profiling 72 /// for a given site. 73 inline StringRef getInstrProfValueProfFuncName() { 74 return INSTR_PROF_VALUE_PROF_FUNC_STR; 75 } 76 77 /// Return the name profile runtime entry point to do memop size value 78 /// profiling. 79 inline StringRef getInstrProfValueProfMemOpFuncName() { 80 return INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR; 81 } 82 83 /// Return the name prefix of variables containing instrumented function names. 84 inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } 85 86 /// Return the name prefix of variables containing per-function control data. 87 inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; } 88 89 /// Return the name prefix of profile counter variables. 90 inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; } 91 92 /// Return the name prefix of value profile variables. 93 inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; } 94 95 /// Return the name of value profile node array variables: 96 inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; } 97 98 /// Return the name of the variable holding the strings (possibly compressed) 99 /// of all function's PGO names. 100 inline StringRef getInstrProfNamesVarName() { 101 return "__llvm_prf_nm"; 102 } 103 104 /// Return the name of a covarage mapping variable (internal linkage) 105 /// for each instrumented source module. Such variables are allocated 106 /// in the __llvm_covmap section. 107 inline StringRef getCoverageMappingVarName() { 108 return "__llvm_coverage_mapping"; 109 } 110 111 /// Return the name of the internal variable recording the array 112 /// of PGO name vars referenced by the coverage mapping. The owning 113 /// functions of those names are not emitted by FE (e.g, unused inline 114 /// functions.) 115 inline StringRef getCoverageUnusedNamesVarName() { 116 return "__llvm_coverage_names"; 117 } 118 119 /// Return the name of function that registers all the per-function control 120 /// data at program startup time by calling __llvm_register_function. This 121 /// function has internal linkage and is called by __llvm_profile_init 122 /// runtime method. This function is not generated for these platforms: 123 /// Darwin, Linux, and FreeBSD. 124 inline StringRef getInstrProfRegFuncsName() { 125 return "__llvm_profile_register_functions"; 126 } 127 128 /// Return the name of the runtime interface that registers per-function control 129 /// data for one instrumented function. 130 inline StringRef getInstrProfRegFuncName() { 131 return "__llvm_profile_register_function"; 132 } 133 134 /// Return the name of the runtime interface that registers the PGO name strings. 135 inline StringRef getInstrProfNamesRegFuncName() { 136 return "__llvm_profile_register_names_function"; 137 } 138 139 /// Return the name of the runtime initialization method that is generated by 140 /// the compiler. The function calls __llvm_profile_register_functions and 141 /// __llvm_profile_override_default_filename functions if needed. This function 142 /// has internal linkage and invoked at startup time via init_array. 143 inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; } 144 145 /// Return the name of the hook variable defined in profile runtime library. 146 /// A reference to the variable causes the linker to link in the runtime 147 /// initialization module (which defines the hook variable). 148 inline StringRef getInstrProfRuntimeHookVarName() { 149 return INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_RUNTIME_VAR); 150 } 151 152 /// Return the name of the compiler generated function that references the 153 /// runtime hook variable. The function is a weak global. 154 inline StringRef getInstrProfRuntimeHookVarUseFuncName() { 155 return "__llvm_profile_runtime_user"; 156 } 157 158 inline StringRef getInstrProfCounterBiasVarName() { 159 return "__llvm_profile_counter_bias"; 160 } 161 162 /// Return the marker used to separate PGO names during serialization. 163 inline StringRef getInstrProfNameSeparator() { return "\01"; } 164 165 /// Return the modified name for function \c F suitable to be 166 /// used the key for profile lookup. Variable \c InLTO indicates if this 167 /// is called in LTO optimization passes. 168 std::string getPGOFuncName(const Function &F, bool InLTO = false, 169 uint64_t Version = INSTR_PROF_INDEX_VERSION); 170 171 /// Return the modified name for a function suitable to be 172 /// used the key for profile lookup. The function's original 173 /// name is \c RawFuncName and has linkage of type \c Linkage. 174 /// The function is defined in module \c FileName. 175 std::string getPGOFuncName(StringRef RawFuncName, 176 GlobalValue::LinkageTypes Linkage, 177 StringRef FileName, 178 uint64_t Version = INSTR_PROF_INDEX_VERSION); 179 180 /// Return the name of the global variable used to store a function 181 /// name in PGO instrumentation. \c FuncName is the name of the function 182 /// returned by the \c getPGOFuncName call. 183 std::string getPGOFuncNameVarName(StringRef FuncName, 184 GlobalValue::LinkageTypes Linkage); 185 186 /// Create and return the global variable for function name used in PGO 187 /// instrumentation. \c FuncName is the name of the function returned 188 /// by \c getPGOFuncName call. 189 GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName); 190 191 /// Create and return the global variable for function name used in PGO 192 /// instrumentation. /// \c FuncName is the name of the function 193 /// returned by \c getPGOFuncName call, \c M is the owning module, 194 /// and \c Linkage is the linkage of the instrumented function. 195 GlobalVariable *createPGOFuncNameVar(Module &M, 196 GlobalValue::LinkageTypes Linkage, 197 StringRef PGOFuncName); 198 199 /// Return the initializer in string of the PGO name var \c NameVar. 200 StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar); 201 202 /// Given a PGO function name, remove the filename prefix and return 203 /// the original (static) function name. 204 StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, 205 StringRef FileName = "<unknown>"); 206 207 /// Given a vector of strings (function PGO names) \c NameStrs, the 208 /// method generates a combined string \c Result thatis ready to be 209 /// serialized. The \c Result string is comprised of three fields: 210 /// The first field is the legnth of the uncompressed strings, and the 211 /// the second field is the length of the zlib-compressed string. 212 /// Both fields are encoded in ULEB128. If \c doCompress is false, the 213 /// third field is the uncompressed strings; otherwise it is the 214 /// compressed string. When the string compression is off, the 215 /// second field will have value zero. 216 Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs, 217 bool doCompression, std::string &Result); 218 219 /// Produce \c Result string with the same format described above. The input 220 /// is vector of PGO function name variables that are referenced. 221 Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars, 222 std::string &Result, bool doCompression = true); 223 224 /// \c NameStrings is a string composed of one of more sub-strings encoded in 225 /// the format described above. The substrings are separated by 0 or more zero 226 /// bytes. This method decodes the string and populates the \c Symtab. 227 Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab); 228 229 /// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being 230 /// set in IR PGO compilation. 231 bool isIRPGOFlagSet(const Module *M); 232 233 /// Check if we can safely rename this Comdat function. Instances of the same 234 /// comdat function may have different control flows thus can not share the 235 /// same counter variable. 236 bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken = false); 237 238 enum InstrProfValueKind : uint32_t { 239 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Enumerator = Value, 240 #include "llvm/ProfileData/InstrProfData.inc" 241 }; 242 243 /// Get the value profile data for value site \p SiteIdx from \p InstrProfR 244 /// and annotate the instruction \p Inst with the value profile meta data. 245 /// Annotate up to \p MaxMDCount (default 3) number of records per value site. 246 void annotateValueSite(Module &M, Instruction &Inst, 247 const InstrProfRecord &InstrProfR, 248 InstrProfValueKind ValueKind, uint32_t SiteIndx, 249 uint32_t MaxMDCount = 3); 250 251 /// Same as the above interface but using an ArrayRef, as well as \p Sum. 252 void annotateValueSite(Module &M, Instruction &Inst, 253 ArrayRef<InstrProfValueData> VDs, uint64_t Sum, 254 InstrProfValueKind ValueKind, uint32_t MaxMDCount); 255 256 /// Extract the value profile data from \p Inst which is annotated with 257 /// value profile meta data. Return false if there is no value data annotated, 258 /// otherwise return true. 259 bool getValueProfDataFromInst(const Instruction &Inst, 260 InstrProfValueKind ValueKind, 261 uint32_t MaxNumValueData, 262 InstrProfValueData ValueData[], 263 uint32_t &ActualNumValueData, uint64_t &TotalC); 264 265 inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; } 266 267 /// Return the PGOFuncName meta data associated with a function. 268 MDNode *getPGOFuncNameMetadata(const Function &F); 269 270 /// Create the PGOFuncName meta data if PGOFuncName is different from 271 /// function's raw name. This should only apply to internal linkage functions 272 /// declared by users only. 273 void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName); 274 275 /// Check if we can use Comdat for profile variables. This will eliminate 276 /// the duplicated profile variables for Comdat functions. 277 bool needsComdatForCounter(const Function &F, const Module &M); 278 279 const std::error_category &instrprof_category(); 280 281 enum class instrprof_error { 282 success = 0, 283 eof, 284 unrecognized_format, 285 bad_magic, 286 bad_header, 287 unsupported_version, 288 unsupported_hash_type, 289 too_large, 290 truncated, 291 malformed, 292 unknown_function, 293 hash_mismatch, 294 count_mismatch, 295 counter_overflow, 296 value_site_count_mismatch, 297 compress_failed, 298 uncompress_failed, 299 empty_raw_profile, 300 zlib_unavailable 301 }; 302 303 inline std::error_code make_error_code(instrprof_error E) { 304 return std::error_code(static_cast<int>(E), instrprof_category()); 305 } 306 307 class InstrProfError : public ErrorInfo<InstrProfError> { 308 public: 309 InstrProfError(instrprof_error Err) : Err(Err) { 310 assert(Err != instrprof_error::success && "Not an error"); 311 } 312 313 std::string message() const override; 314 315 void log(raw_ostream &OS) const override { OS << message(); } 316 317 std::error_code convertToErrorCode() const override { 318 return make_error_code(Err); 319 } 320 321 instrprof_error get() const { return Err; } 322 323 /// Consume an Error and return the raw enum value contained within it. The 324 /// Error must either be a success value, or contain a single InstrProfError. 325 static instrprof_error take(Error E) { 326 auto Err = instrprof_error::success; 327 handleAllErrors(std::move(E), [&Err](const InstrProfError &IPE) { 328 assert(Err == instrprof_error::success && "Multiple errors encountered"); 329 Err = IPE.get(); 330 }); 331 return Err; 332 } 333 334 static char ID; 335 336 private: 337 instrprof_error Err; 338 }; 339 340 class SoftInstrProfErrors { 341 /// Count the number of soft instrprof_errors encountered and keep track of 342 /// the first such error for reporting purposes. 343 344 /// The first soft error encountered. 345 instrprof_error FirstError = instrprof_error::success; 346 347 /// The number of hash mismatches. 348 unsigned NumHashMismatches = 0; 349 350 /// The number of count mismatches. 351 unsigned NumCountMismatches = 0; 352 353 /// The number of counter overflows. 354 unsigned NumCounterOverflows = 0; 355 356 /// The number of value site count mismatches. 357 unsigned NumValueSiteCountMismatches = 0; 358 359 public: 360 SoftInstrProfErrors() = default; 361 362 ~SoftInstrProfErrors() { 363 assert(FirstError == instrprof_error::success && 364 "Unchecked soft error encountered"); 365 } 366 367 /// Track a soft error (\p IE) and increment its associated counter. 368 void addError(instrprof_error IE); 369 370 /// Get the number of hash mismatches. 371 unsigned getNumHashMismatches() const { return NumHashMismatches; } 372 373 /// Get the number of count mismatches. 374 unsigned getNumCountMismatches() const { return NumCountMismatches; } 375 376 /// Get the number of counter overflows. 377 unsigned getNumCounterOverflows() const { return NumCounterOverflows; } 378 379 /// Get the number of value site count mismatches. 380 unsigned getNumValueSiteCountMismatches() const { 381 return NumValueSiteCountMismatches; 382 } 383 384 /// Return the first encountered error and reset FirstError to a success 385 /// value. 386 Error takeError() { 387 if (FirstError == instrprof_error::success) 388 return Error::success(); 389 auto E = make_error<InstrProfError>(FirstError); 390 FirstError = instrprof_error::success; 391 return E; 392 } 393 }; 394 395 namespace object { 396 397 class SectionRef; 398 399 } // end namespace object 400 401 namespace IndexedInstrProf { 402 403 uint64_t ComputeHash(StringRef K); 404 405 } // end namespace IndexedInstrProf 406 407 /// A symbol table used for function PGO name look-up with keys 408 /// (such as pointers, md5hash values) to the function. A function's 409 /// PGO name or name's md5hash are used in retrieving the profile 410 /// data of the function. See \c getPGOFuncName() method for details 411 /// on how PGO name is formed. 412 class InstrProfSymtab { 413 public: 414 using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>; 415 416 private: 417 StringRef Data; 418 uint64_t Address = 0; 419 // Unique name strings. 420 StringSet<> NameTab; 421 // A map from MD5 keys to function name strings. 422 std::vector<std::pair<uint64_t, StringRef>> MD5NameMap; 423 // A map from MD5 keys to function define. We only populate this map 424 // when build the Symtab from a Module. 425 std::vector<std::pair<uint64_t, Function *>> MD5FuncMap; 426 // A map from function runtime address to function name MD5 hash. 427 // This map is only populated and used by raw instr profile reader. 428 AddrHashMap AddrToMD5Map; 429 bool Sorted = false; 430 431 static StringRef getExternalSymbol() { 432 return "** External Symbol **"; 433 } 434 435 // If the symtab is created by a series of calls to \c addFuncName, \c 436 // finalizeSymtab needs to be called before looking up function names. 437 // This is required because the underlying map is a vector (for space 438 // efficiency) which needs to be sorted. 439 inline void finalizeSymtab(); 440 441 public: 442 InstrProfSymtab() = default; 443 444 /// Create InstrProfSymtab from an object file section which 445 /// contains function PGO names. When section may contain raw 446 /// string data or string data in compressed form. This method 447 /// only initialize the symtab with reference to the data and 448 /// the section base address. The decompression will be delayed 449 /// until before it is used. See also \c create(StringRef) method. 450 Error create(object::SectionRef &Section); 451 452 /// This interface is used by reader of CoverageMapping test 453 /// format. 454 inline Error create(StringRef D, uint64_t BaseAddr); 455 456 /// \c NameStrings is a string composed of one of more sub-strings 457 /// encoded in the format described in \c collectPGOFuncNameStrings. 458 /// This method is a wrapper to \c readPGOFuncNameStrings method. 459 inline Error create(StringRef NameStrings); 460 461 /// A wrapper interface to populate the PGO symtab with functions 462 /// decls from module \c M. This interface is used by transformation 463 /// passes such as indirect function call promotion. Variable \c InLTO 464 /// indicates if this is called from LTO optimization passes. 465 Error create(Module &M, bool InLTO = false); 466 467 /// Create InstrProfSymtab from a set of names iteratable from 468 /// \p IterRange. This interface is used by IndexedProfReader. 469 template <typename NameIterRange> Error create(const NameIterRange &IterRange); 470 471 /// Update the symtab by adding \p FuncName to the table. This interface 472 /// is used by the raw and text profile readers. 473 Error addFuncName(StringRef FuncName) { 474 if (FuncName.empty()) 475 return make_error<InstrProfError>(instrprof_error::malformed); 476 auto Ins = NameTab.insert(FuncName); 477 if (Ins.second) { 478 MD5NameMap.push_back(std::make_pair( 479 IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey())); 480 Sorted = false; 481 } 482 return Error::success(); 483 } 484 485 /// Map a function address to its name's MD5 hash. This interface 486 /// is only used by the raw profiler reader. 487 void mapAddress(uint64_t Addr, uint64_t MD5Val) { 488 AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val)); 489 } 490 491 /// Return a function's hash, or 0, if the function isn't in this SymTab. 492 uint64_t getFunctionHashFromAddress(uint64_t Address); 493 494 /// Return function's PGO name from the function name's symbol 495 /// address in the object file. If an error occurs, return 496 /// an empty string. 497 StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize); 498 499 /// Return function's PGO name from the name's md5 hash value. 500 /// If not found, return an empty string. 501 inline StringRef getFuncName(uint64_t FuncMD5Hash); 502 503 /// Just like getFuncName, except that it will return a non-empty StringRef 504 /// if the function is external to this symbol table. All such cases 505 /// will be represented using the same StringRef value. 506 inline StringRef getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash); 507 508 /// True if Symbol is the value used to represent external symbols. 509 static bool isExternalSymbol(const StringRef &Symbol) { 510 return Symbol == InstrProfSymtab::getExternalSymbol(); 511 } 512 513 /// Return function from the name's md5 hash. Return nullptr if not found. 514 inline Function *getFunction(uint64_t FuncMD5Hash); 515 516 /// Return the function's original assembly name by stripping off 517 /// the prefix attached (to symbols with priviate linkage). For 518 /// global functions, it returns the same string as getFuncName. 519 inline StringRef getOrigFuncName(uint64_t FuncMD5Hash); 520 521 /// Return the name section data. 522 inline StringRef getNameData() const { return Data; } 523 }; 524 525 Error InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) { 526 Data = D; 527 Address = BaseAddr; 528 return Error::success(); 529 } 530 531 Error InstrProfSymtab::create(StringRef NameStrings) { 532 return readPGOFuncNameStrings(NameStrings, *this); 533 } 534 535 template <typename NameIterRange> 536 Error InstrProfSymtab::create(const NameIterRange &IterRange) { 537 for (auto Name : IterRange) 538 if (Error E = addFuncName(Name)) 539 return E; 540 541 finalizeSymtab(); 542 return Error::success(); 543 } 544 545 void InstrProfSymtab::finalizeSymtab() { 546 if (Sorted) 547 return; 548 llvm::sort(MD5NameMap, less_first()); 549 llvm::sort(MD5FuncMap, less_first()); 550 llvm::sort(AddrToMD5Map, less_first()); 551 AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()), 552 AddrToMD5Map.end()); 553 Sorted = true; 554 } 555 556 StringRef InstrProfSymtab::getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash) { 557 StringRef ret = getFuncName(FuncMD5Hash); 558 if (ret.empty()) 559 return InstrProfSymtab::getExternalSymbol(); 560 return ret; 561 } 562 563 StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) { 564 finalizeSymtab(); 565 auto Result = llvm::lower_bound(MD5NameMap, FuncMD5Hash, 566 [](const std::pair<uint64_t, StringRef> &LHS, 567 uint64_t RHS) { return LHS.first < RHS; }); 568 if (Result != MD5NameMap.end() && Result->first == FuncMD5Hash) 569 return Result->second; 570 return StringRef(); 571 } 572 573 Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) { 574 finalizeSymtab(); 575 auto Result = llvm::lower_bound(MD5FuncMap, FuncMD5Hash, 576 [](const std::pair<uint64_t, Function *> &LHS, 577 uint64_t RHS) { return LHS.first < RHS; }); 578 if (Result != MD5FuncMap.end() && Result->first == FuncMD5Hash) 579 return Result->second; 580 return nullptr; 581 } 582 583 // See also getPGOFuncName implementation. These two need to be 584 // matched. 585 StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) { 586 StringRef PGOName = getFuncName(FuncMD5Hash); 587 size_t S = PGOName.find_first_of(':'); 588 if (S == StringRef::npos) 589 return PGOName; 590 return PGOName.drop_front(S + 1); 591 } 592 593 // To store the sums of profile count values, or the percentage of 594 // the sums of the total count values. 595 struct CountSumOrPercent { 596 uint64_t NumEntries; 597 double CountSum; 598 double ValueCounts[IPVK_Last - IPVK_First + 1]; 599 CountSumOrPercent() : NumEntries(0), CountSum(0.0f), ValueCounts() {} 600 void reset() { 601 NumEntries = 0; 602 CountSum = 0.0f; 603 for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) 604 ValueCounts[I] = 0.0f; 605 } 606 }; 607 608 // Function level or program level overlap information. 609 struct OverlapStats { 610 enum OverlapStatsLevel { ProgramLevel, FunctionLevel }; 611 // Sum of the total count values for the base profile. 612 CountSumOrPercent Base; 613 // Sum of the total count values for the test profile. 614 CountSumOrPercent Test; 615 // Overlap lap score. Should be in range of [0.0f to 1.0f]. 616 CountSumOrPercent Overlap; 617 CountSumOrPercent Mismatch; 618 CountSumOrPercent Unique; 619 OverlapStatsLevel Level; 620 const std::string *BaseFilename; 621 const std::string *TestFilename; 622 StringRef FuncName; 623 uint64_t FuncHash; 624 bool Valid; 625 626 OverlapStats(OverlapStatsLevel L = ProgramLevel) 627 : Level(L), BaseFilename(nullptr), TestFilename(nullptr), FuncHash(0), 628 Valid(false) {} 629 630 void dump(raw_fd_ostream &OS) const; 631 632 void setFuncInfo(StringRef Name, uint64_t Hash) { 633 FuncName = Name; 634 FuncHash = Hash; 635 } 636 637 Error accumulateCounts(const std::string &BaseFilename, 638 const std::string &TestFilename, bool IsCS); 639 void addOneMismatch(const CountSumOrPercent &MismatchFunc); 640 void addOneUnique(const CountSumOrPercent &UniqueFunc); 641 642 static inline double score(uint64_t Val1, uint64_t Val2, double Sum1, 643 double Sum2) { 644 if (Sum1 < 1.0f || Sum2 < 1.0f) 645 return 0.0f; 646 return std::min(Val1 / Sum1, Val2 / Sum2); 647 } 648 }; 649 650 // This is used to filter the functions whose overlap information 651 // to be output. 652 struct OverlapFuncFilters { 653 uint64_t ValueCutoff; 654 const std::string NameFilter; 655 }; 656 657 struct InstrProfValueSiteRecord { 658 /// Value profiling data pairs at a given value site. 659 std::list<InstrProfValueData> ValueData; 660 661 InstrProfValueSiteRecord() { ValueData.clear(); } 662 template <class InputIterator> 663 InstrProfValueSiteRecord(InputIterator F, InputIterator L) 664 : ValueData(F, L) {} 665 666 /// Sort ValueData ascending by Value 667 void sortByTargetValues() { 668 ValueData.sort( 669 [](const InstrProfValueData &left, const InstrProfValueData &right) { 670 return left.Value < right.Value; 671 }); 672 } 673 /// Sort ValueData Descending by Count 674 inline void sortByCount(); 675 676 /// Merge data from another InstrProfValueSiteRecord 677 /// Optionally scale merged counts by \p Weight. 678 void merge(InstrProfValueSiteRecord &Input, uint64_t Weight, 679 function_ref<void(instrprof_error)> Warn); 680 /// Scale up value profile data counts by N (Numerator) / D (Denominator). 681 void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn); 682 683 /// Compute the overlap b/w this record and Input record. 684 void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind, 685 OverlapStats &Overlap, OverlapStats &FuncLevelOverlap); 686 }; 687 688 /// Profiling information for a single function. 689 struct InstrProfRecord { 690 std::vector<uint64_t> Counts; 691 692 InstrProfRecord() = default; 693 InstrProfRecord(std::vector<uint64_t> Counts) : Counts(std::move(Counts)) {} 694 InstrProfRecord(InstrProfRecord &&) = default; 695 InstrProfRecord(const InstrProfRecord &RHS) 696 : Counts(RHS.Counts), 697 ValueData(RHS.ValueData 698 ? std::make_unique<ValueProfData>(*RHS.ValueData) 699 : nullptr) {} 700 InstrProfRecord &operator=(InstrProfRecord &&) = default; 701 InstrProfRecord &operator=(const InstrProfRecord &RHS) { 702 Counts = RHS.Counts; 703 if (!RHS.ValueData) { 704 ValueData = nullptr; 705 return *this; 706 } 707 if (!ValueData) 708 ValueData = std::make_unique<ValueProfData>(*RHS.ValueData); 709 else 710 *ValueData = *RHS.ValueData; 711 return *this; 712 } 713 714 /// Return the number of value profile kinds with non-zero number 715 /// of profile sites. 716 inline uint32_t getNumValueKinds() const; 717 /// Return the number of instrumented sites for ValueKind. 718 inline uint32_t getNumValueSites(uint32_t ValueKind) const; 719 720 /// Return the total number of ValueData for ValueKind. 721 inline uint32_t getNumValueData(uint32_t ValueKind) const; 722 723 /// Return the number of value data collected for ValueKind at profiling 724 /// site: Site. 725 inline uint32_t getNumValueDataForSite(uint32_t ValueKind, 726 uint32_t Site) const; 727 728 /// Return the array of profiled values at \p Site. If \p TotalC 729 /// is not null, the total count of all target values at this site 730 /// will be stored in \c *TotalC. 731 inline std::unique_ptr<InstrProfValueData[]> 732 getValueForSite(uint32_t ValueKind, uint32_t Site, 733 uint64_t *TotalC = nullptr) const; 734 735 /// Get the target value/counts of kind \p ValueKind collected at site 736 /// \p Site and store the result in array \p Dest. Return the total 737 /// counts of all target values at this site. 738 inline uint64_t getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind, 739 uint32_t Site) const; 740 741 /// Reserve space for NumValueSites sites. 742 inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites); 743 744 /// Add ValueData for ValueKind at value Site. 745 void addValueData(uint32_t ValueKind, uint32_t Site, 746 InstrProfValueData *VData, uint32_t N, 747 InstrProfSymtab *SymTab); 748 749 /// Merge the counts in \p Other into this one. 750 /// Optionally scale merged counts by \p Weight. 751 void merge(InstrProfRecord &Other, uint64_t Weight, 752 function_ref<void(instrprof_error)> Warn); 753 754 /// Scale up profile counts (including value profile data) by 755 /// a factor of (N / D). 756 void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn); 757 758 /// Sort value profile data (per site) by count. 759 void sortValueData() { 760 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 761 for (auto &SR : getValueSitesForKind(Kind)) 762 SR.sortByCount(); 763 } 764 765 /// Clear value data entries and edge counters. 766 void Clear() { 767 Counts.clear(); 768 clearValueData(); 769 } 770 771 /// Clear value data entries 772 void clearValueData() { ValueData = nullptr; } 773 774 /// Compute the sums of all counts and store in Sum. 775 void accumulateCounts(CountSumOrPercent &Sum) const; 776 777 /// Compute the overlap b/w this IntrprofRecord and Other. 778 void overlap(InstrProfRecord &Other, OverlapStats &Overlap, 779 OverlapStats &FuncLevelOverlap, uint64_t ValueCutoff); 780 781 /// Compute the overlap of value profile counts. 782 void overlapValueProfData(uint32_t ValueKind, InstrProfRecord &Src, 783 OverlapStats &Overlap, 784 OverlapStats &FuncLevelOverlap); 785 786 private: 787 struct ValueProfData { 788 std::vector<InstrProfValueSiteRecord> IndirectCallSites; 789 std::vector<InstrProfValueSiteRecord> MemOPSizes; 790 }; 791 std::unique_ptr<ValueProfData> ValueData; 792 793 MutableArrayRef<InstrProfValueSiteRecord> 794 getValueSitesForKind(uint32_t ValueKind) { 795 // Cast to /add/ const (should be an implicit_cast, ideally, if that's ever 796 // implemented in LLVM) to call the const overload of this function, then 797 // cast away the constness from the result. 798 auto AR = const_cast<const InstrProfRecord *>(this)->getValueSitesForKind( 799 ValueKind); 800 return makeMutableArrayRef( 801 const_cast<InstrProfValueSiteRecord *>(AR.data()), AR.size()); 802 } 803 ArrayRef<InstrProfValueSiteRecord> 804 getValueSitesForKind(uint32_t ValueKind) const { 805 if (!ValueData) 806 return None; 807 switch (ValueKind) { 808 case IPVK_IndirectCallTarget: 809 return ValueData->IndirectCallSites; 810 case IPVK_MemOPSize: 811 return ValueData->MemOPSizes; 812 default: 813 llvm_unreachable("Unknown value kind!"); 814 } 815 } 816 817 std::vector<InstrProfValueSiteRecord> & 818 getOrCreateValueSitesForKind(uint32_t ValueKind) { 819 if (!ValueData) 820 ValueData = std::make_unique<ValueProfData>(); 821 switch (ValueKind) { 822 case IPVK_IndirectCallTarget: 823 return ValueData->IndirectCallSites; 824 case IPVK_MemOPSize: 825 return ValueData->MemOPSizes; 826 default: 827 llvm_unreachable("Unknown value kind!"); 828 } 829 } 830 831 // Map indirect call target name hash to name string. 832 uint64_t remapValue(uint64_t Value, uint32_t ValueKind, 833 InstrProfSymtab *SymTab); 834 835 // Merge Value Profile data from Src record to this record for ValueKind. 836 // Scale merged value counts by \p Weight. 837 void mergeValueProfData(uint32_t ValkeKind, InstrProfRecord &Src, 838 uint64_t Weight, 839 function_ref<void(instrprof_error)> Warn); 840 841 // Scale up value profile data count by N (Numerator) / D (Denominator). 842 void scaleValueProfData(uint32_t ValueKind, uint64_t N, uint64_t D, 843 function_ref<void(instrprof_error)> Warn); 844 }; 845 846 struct NamedInstrProfRecord : InstrProfRecord { 847 StringRef Name; 848 uint64_t Hash; 849 850 // We reserve this bit as the flag for context sensitive profile record. 851 static const int CS_FLAG_IN_FUNC_HASH = 60; 852 853 NamedInstrProfRecord() = default; 854 NamedInstrProfRecord(StringRef Name, uint64_t Hash, 855 std::vector<uint64_t> Counts) 856 : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {} 857 858 static bool hasCSFlagInHash(uint64_t FuncHash) { 859 return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1); 860 } 861 static void setCSFlagInHash(uint64_t &FuncHash) { 862 FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH); 863 } 864 }; 865 866 uint32_t InstrProfRecord::getNumValueKinds() const { 867 uint32_t NumValueKinds = 0; 868 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 869 NumValueKinds += !(getValueSitesForKind(Kind).empty()); 870 return NumValueKinds; 871 } 872 873 uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const { 874 uint32_t N = 0; 875 for (auto &SR : getValueSitesForKind(ValueKind)) 876 N += SR.ValueData.size(); 877 return N; 878 } 879 880 uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const { 881 return getValueSitesForKind(ValueKind).size(); 882 } 883 884 uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind, 885 uint32_t Site) const { 886 return getValueSitesForKind(ValueKind)[Site].ValueData.size(); 887 } 888 889 std::unique_ptr<InstrProfValueData[]> 890 InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site, 891 uint64_t *TotalC) const { 892 uint64_t Dummy = 0; 893 uint64_t &TotalCount = (TotalC == nullptr ? Dummy : *TotalC); 894 uint32_t N = getNumValueDataForSite(ValueKind, Site); 895 if (N == 0) { 896 TotalCount = 0; 897 return std::unique_ptr<InstrProfValueData[]>(nullptr); 898 } 899 900 auto VD = std::make_unique<InstrProfValueData[]>(N); 901 TotalCount = getValueForSite(VD.get(), ValueKind, Site); 902 903 return VD; 904 } 905 906 uint64_t InstrProfRecord::getValueForSite(InstrProfValueData Dest[], 907 uint32_t ValueKind, 908 uint32_t Site) const { 909 uint32_t I = 0; 910 uint64_t TotalCount = 0; 911 for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) { 912 Dest[I].Value = V.Value; 913 Dest[I].Count = V.Count; 914 TotalCount = SaturatingAdd(TotalCount, V.Count); 915 I++; 916 } 917 return TotalCount; 918 } 919 920 void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) { 921 if (!NumValueSites) 922 return; 923 getOrCreateValueSitesForKind(ValueKind).reserve(NumValueSites); 924 } 925 926 inline support::endianness getHostEndianness() { 927 return sys::IsLittleEndianHost ? support::little : support::big; 928 } 929 930 // Include definitions for value profile data 931 #define INSTR_PROF_VALUE_PROF_DATA 932 #include "llvm/ProfileData/InstrProfData.inc" 933 934 void InstrProfValueSiteRecord::sortByCount() { 935 ValueData.sort( 936 [](const InstrProfValueData &left, const InstrProfValueData &right) { 937 return left.Count > right.Count; 938 }); 939 // Now truncate 940 size_t max_s = INSTR_PROF_MAX_NUM_VAL_PER_SITE; 941 if (ValueData.size() > max_s) 942 ValueData.resize(max_s); 943 } 944 945 namespace IndexedInstrProf { 946 947 enum class HashT : uint32_t { 948 MD5, 949 Last = MD5 950 }; 951 952 inline uint64_t ComputeHash(HashT Type, StringRef K) { 953 switch (Type) { 954 case HashT::MD5: 955 return MD5Hash(K); 956 } 957 llvm_unreachable("Unhandled hash type"); 958 } 959 960 const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" 961 962 enum ProfVersion { 963 // Version 1 is the first version. In this version, the value of 964 // a key/value pair can only include profile data of a single function. 965 // Due to this restriction, the number of block counters for a given 966 // function is not recorded but derived from the length of the value. 967 Version1 = 1, 968 // The version 2 format supports recording profile data of multiple 969 // functions which share the same key in one value field. To support this, 970 // the number block counters is recorded as an uint64_t field right after the 971 // function structural hash. 972 Version2 = 2, 973 // Version 3 supports value profile data. The value profile data is expected 974 // to follow the block counter profile data. 975 Version3 = 3, 976 // In this version, profile summary data \c IndexedInstrProf::Summary is 977 // stored after the profile header. 978 Version4 = 4, 979 // In this version, the frontend PGO stable hash algorithm defaults to V2. 980 Version5 = 5, 981 // In this version, the frontend PGO stable hash algorithm got fixed and 982 // may produce hashes different from Version5. 983 Version6 = 6, 984 // An additional counter is added around logical operators. 985 Version7 = 7, 986 // The current version is 7. 987 CurrentVersion = INSTR_PROF_INDEX_VERSION 988 }; 989 const uint64_t Version = ProfVersion::CurrentVersion; 990 991 const HashT HashType = HashT::MD5; 992 993 inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); } 994 995 // This structure defines the file header of the LLVM profile 996 // data file in indexed-format. 997 struct Header { 998 uint64_t Magic; 999 uint64_t Version; 1000 uint64_t Unused; // Becomes unused since version 4 1001 uint64_t HashType; 1002 uint64_t HashOffset; 1003 }; 1004 1005 // Profile summary data recorded in the profile data file in indexed 1006 // format. It is introduced in version 4. The summary data follows 1007 // right after the profile file header. 1008 struct Summary { 1009 struct Entry { 1010 uint64_t Cutoff; ///< The required percentile of total execution count. 1011 uint64_t 1012 MinBlockCount; ///< The minimum execution count for this percentile. 1013 uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count. 1014 }; 1015 // The field kind enumerator to assigned value mapping should remain 1016 // unchanged when a new kind is added or an old kind gets deleted in 1017 // the future. 1018 enum SummaryFieldKind { 1019 /// The total number of functions instrumented. 1020 TotalNumFunctions = 0, 1021 /// Total number of instrumented blocks/edges. 1022 TotalNumBlocks = 1, 1023 /// The maximal execution count among all functions. 1024 /// This field does not exist for profile data from IR based 1025 /// instrumentation. 1026 MaxFunctionCount = 2, 1027 /// Max block count of the program. 1028 MaxBlockCount = 3, 1029 /// Max internal block count of the program (excluding entry blocks). 1030 MaxInternalBlockCount = 4, 1031 /// The sum of all instrumented block counts. 1032 TotalBlockCount = 5, 1033 NumKinds = TotalBlockCount + 1 1034 }; 1035 1036 // The number of summmary fields following the summary header. 1037 uint64_t NumSummaryFields; 1038 // The number of Cutoff Entries (Summary::Entry) following summary fields. 1039 uint64_t NumCutoffEntries; 1040 1041 Summary() = delete; 1042 Summary(uint32_t Size) { memset(this, 0, Size); } 1043 1044 void operator delete(void *ptr) { ::operator delete(ptr); } 1045 1046 static uint32_t getSize(uint32_t NumSumFields, uint32_t NumCutoffEntries) { 1047 return sizeof(Summary) + NumCutoffEntries * sizeof(Entry) + 1048 NumSumFields * sizeof(uint64_t); 1049 } 1050 1051 const uint64_t *getSummaryDataBase() const { 1052 return reinterpret_cast<const uint64_t *>(this + 1); 1053 } 1054 1055 uint64_t *getSummaryDataBase() { 1056 return reinterpret_cast<uint64_t *>(this + 1); 1057 } 1058 1059 const Entry *getCutoffEntryBase() const { 1060 return reinterpret_cast<const Entry *>( 1061 &getSummaryDataBase()[NumSummaryFields]); 1062 } 1063 1064 Entry *getCutoffEntryBase() { 1065 return reinterpret_cast<Entry *>(&getSummaryDataBase()[NumSummaryFields]); 1066 } 1067 1068 uint64_t get(SummaryFieldKind K) const { 1069 return getSummaryDataBase()[K]; 1070 } 1071 1072 void set(SummaryFieldKind K, uint64_t V) { 1073 getSummaryDataBase()[K] = V; 1074 } 1075 1076 const Entry &getEntry(uint32_t I) const { return getCutoffEntryBase()[I]; } 1077 1078 void setEntry(uint32_t I, const ProfileSummaryEntry &E) { 1079 Entry &ER = getCutoffEntryBase()[I]; 1080 ER.Cutoff = E.Cutoff; 1081 ER.MinBlockCount = E.MinCount; 1082 ER.NumBlocks = E.NumCounts; 1083 } 1084 }; 1085 1086 inline std::unique_ptr<Summary> allocSummary(uint32_t TotalSize) { 1087 return std::unique_ptr<Summary>(new (::operator new(TotalSize)) 1088 Summary(TotalSize)); 1089 } 1090 1091 } // end namespace IndexedInstrProf 1092 1093 namespace RawInstrProf { 1094 1095 // Version 1: First version 1096 // Version 2: Added value profile data section. Per-function control data 1097 // struct has more fields to describe value profile information. 1098 // Version 3: Compressed name section support. Function PGO name reference 1099 // from control data struct is changed from raw pointer to Name's MD5 value. 1100 // Version 4: ValueDataBegin and ValueDataSizes fields are removed from the 1101 // raw header. 1102 // Version 5: Bit 60 of FuncHash is reserved for the flag for the context 1103 // sensitive records. 1104 const uint64_t Version = INSTR_PROF_RAW_VERSION; 1105 1106 template <class IntPtrT> inline uint64_t getMagic(); 1107 template <> inline uint64_t getMagic<uint64_t>() { 1108 return INSTR_PROF_RAW_MAGIC_64; 1109 } 1110 1111 template <> inline uint64_t getMagic<uint32_t>() { 1112 return INSTR_PROF_RAW_MAGIC_32; 1113 } 1114 1115 // Per-function profile data header/control structure. 1116 // The definition should match the structure defined in 1117 // compiler-rt/lib/profile/InstrProfiling.h. 1118 // It should also match the synthesized type in 1119 // Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters. 1120 template <class IntPtrT> struct alignas(8) ProfileData { 1121 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name; 1122 #include "llvm/ProfileData/InstrProfData.inc" 1123 }; 1124 1125 // File header structure of the LLVM profile data in raw format. 1126 // The definition should match the header referenced in 1127 // compiler-rt/lib/profile/InstrProfilingFile.c and 1128 // InstrProfilingBuffer.c. 1129 struct Header { 1130 #define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name; 1131 #include "llvm/ProfileData/InstrProfData.inc" 1132 }; 1133 1134 } // end namespace RawInstrProf 1135 1136 // Parse MemOP Size range option. 1137 void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, 1138 int64_t &RangeLast); 1139 1140 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 1141 // aware this is an ir_level profile so it can set the version flag. 1142 void createIRLevelProfileFlagVar(Module &M, bool IsCS, 1143 bool InstrEntryBBEnabled); 1144 1145 // Create the variable for the profile file name. 1146 void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput); 1147 1148 // Whether to compress function names in profile records, and filenames in 1149 // code coverage mappings. Used by the Instrumentation library and unit tests. 1150 extern cl::opt<bool> DoInstrProfNameCompression; 1151 1152 } // end namespace llvm 1153 #endif // LLVM_PROFILEDATA_INSTRPROF_H 1154