1 //===- InstrProf.h - Instrumented profiling format support ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Instrumentation-based profiling data is generated by instrumented 10 // binaries through library functions in compiler-rt, and read by the clang 11 // frontend to feed PGO. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_PROFILEDATA_INSTRPROF_H 16 #define LLVM_PROFILEDATA_INSTRPROF_H 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Triple.h" 23 #include "llvm/IR/GlobalValue.h" 24 #include "llvm/IR/ProfileSummary.h" 25 #include "llvm/ProfileData/InstrProfData.inc" 26 #include "llvm/Support/Compiler.h" 27 #include "llvm/Support/Endian.h" 28 #include "llvm/Support/Error.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Support/Host.h" 31 #include "llvm/Support/MD5.h" 32 #include "llvm/Support/MathExtras.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include <algorithm> 35 #include <cassert> 36 #include <cstddef> 37 #include <cstdint> 38 #include <cstring> 39 #include <list> 40 #include <memory> 41 #include <string> 42 #include <system_error> 43 #include <utility> 44 #include <vector> 45 46 namespace llvm { 47 48 class Function; 49 class GlobalVariable; 50 struct InstrProfRecord; 51 class InstrProfSymtab; 52 class Instruction; 53 class MDNode; 54 class Module; 55 56 enum InstrProfSectKind { 57 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind, 58 #include "llvm/ProfileData/InstrProfData.inc" 59 }; 60 61 /// Return the name of the profile section corresponding to \p IPSK. 62 /// 63 /// The name of the section depends on the object format type \p OF. If 64 /// \p AddSegmentInfo is true, a segment prefix and additional linker hints may 65 /// be added to the section name (this is the default). 66 std::string getInstrProfSectionName(InstrProfSectKind IPSK, 67 Triple::ObjectFormatType OF, 68 bool AddSegmentInfo = true); 69 70 /// Return the name profile runtime entry point to do value profiling 71 /// for a given site. 72 inline StringRef getInstrProfValueProfFuncName() { 73 return INSTR_PROF_VALUE_PROF_FUNC_STR; 74 } 75 76 /// Return the name profile runtime entry point to do value range profiling. 77 inline StringRef getInstrProfValueRangeProfFuncName() { 78 return INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR; 79 } 80 81 /// Return the name prefix of variables containing instrumented function names. 82 inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } 83 84 /// Return the name prefix of variables containing per-function control data. 85 inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; } 86 87 /// Return the name prefix of profile counter variables. 88 inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; } 89 90 /// Return the name prefix of value profile variables. 91 inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; } 92 93 /// Return the name of value profile node array variables: 94 inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; } 95 96 /// Return the name prefix of the COMDAT group for instrumentation variables 97 /// associated with a COMDAT function. 98 inline StringRef getInstrProfComdatPrefix() { return "__profv_"; } 99 100 /// Return the name of the variable holding the strings (possibly compressed) 101 /// of all function's PGO names. 102 inline StringRef getInstrProfNamesVarName() { 103 return "__llvm_prf_nm"; 104 } 105 106 /// Return the name of a covarage mapping variable (internal linkage) 107 /// for each instrumented source module. Such variables are allocated 108 /// in the __llvm_covmap section. 109 inline StringRef getCoverageMappingVarName() { 110 return "__llvm_coverage_mapping"; 111 } 112 113 /// Return the name of the internal variable recording the array 114 /// of PGO name vars referenced by the coverage mapping. The owning 115 /// functions of those names are not emitted by FE (e.g, unused inline 116 /// functions.) 117 inline StringRef getCoverageUnusedNamesVarName() { 118 return "__llvm_coverage_names"; 119 } 120 121 /// Return the name of function that registers all the per-function control 122 /// data at program startup time by calling __llvm_register_function. This 123 /// function has internal linkage and is called by __llvm_profile_init 124 /// runtime method. This function is not generated for these platforms: 125 /// Darwin, Linux, and FreeBSD. 126 inline StringRef getInstrProfRegFuncsName() { 127 return "__llvm_profile_register_functions"; 128 } 129 130 /// Return the name of the runtime interface that registers per-function control 131 /// data for one instrumented function. 132 inline StringRef getInstrProfRegFuncName() { 133 return "__llvm_profile_register_function"; 134 } 135 136 /// Return the name of the runtime interface that registers the PGO name strings. 137 inline StringRef getInstrProfNamesRegFuncName() { 138 return "__llvm_profile_register_names_function"; 139 } 140 141 /// Return the name of the runtime initialization method that is generated by 142 /// the compiler. The function calls __llvm_profile_register_functions and 143 /// __llvm_profile_override_default_filename functions if needed. This function 144 /// has internal linkage and invoked at startup time via init_array. 145 inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; } 146 147 /// Return the name of the hook variable defined in profile runtime library. 148 /// A reference to the variable causes the linker to link in the runtime 149 /// initialization module (which defines the hook variable). 150 inline StringRef getInstrProfRuntimeHookVarName() { 151 return INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_RUNTIME_VAR); 152 } 153 154 /// Return the name of the compiler generated function that references the 155 /// runtime hook variable. The function is a weak global. 156 inline StringRef getInstrProfRuntimeHookVarUseFuncName() { 157 return "__llvm_profile_runtime_user"; 158 } 159 160 /// Return the marker used to separate PGO names during serialization. 161 inline StringRef getInstrProfNameSeparator() { return "\01"; } 162 163 /// Return the modified name for function \c F suitable to be 164 /// used the key for profile lookup. Variable \c InLTO indicates if this 165 /// is called in LTO optimization passes. 166 std::string getPGOFuncName(const Function &F, bool InLTO = false, 167 uint64_t Version = INSTR_PROF_INDEX_VERSION); 168 169 /// Return the modified name for a function suitable to be 170 /// used the key for profile lookup. The function's original 171 /// name is \c RawFuncName and has linkage of type \c Linkage. 172 /// The function is defined in module \c FileName. 173 std::string getPGOFuncName(StringRef RawFuncName, 174 GlobalValue::LinkageTypes Linkage, 175 StringRef FileName, 176 uint64_t Version = INSTR_PROF_INDEX_VERSION); 177 178 /// Return the name of the global variable used to store a function 179 /// name in PGO instrumentation. \c FuncName is the name of the function 180 /// returned by the \c getPGOFuncName call. 181 std::string getPGOFuncNameVarName(StringRef FuncName, 182 GlobalValue::LinkageTypes Linkage); 183 184 /// Create and return the global variable for function name used in PGO 185 /// instrumentation. \c FuncName is the name of the function returned 186 /// by \c getPGOFuncName call. 187 GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName); 188 189 /// Create and return the global variable for function name used in PGO 190 /// instrumentation. /// \c FuncName is the name of the function 191 /// returned by \c getPGOFuncName call, \c M is the owning module, 192 /// and \c Linkage is the linkage of the instrumented function. 193 GlobalVariable *createPGOFuncNameVar(Module &M, 194 GlobalValue::LinkageTypes Linkage, 195 StringRef PGOFuncName); 196 197 /// Return the initializer in string of the PGO name var \c NameVar. 198 StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar); 199 200 /// Given a PGO function name, remove the filename prefix and return 201 /// the original (static) function name. 202 StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, 203 StringRef FileName = "<unknown>"); 204 205 /// Given a vector of strings (function PGO names) \c NameStrs, the 206 /// method generates a combined string \c Result thatis ready to be 207 /// serialized. The \c Result string is comprised of three fields: 208 /// The first field is the legnth of the uncompressed strings, and the 209 /// the second field is the length of the zlib-compressed string. 210 /// Both fields are encoded in ULEB128. If \c doCompress is false, the 211 /// third field is the uncompressed strings; otherwise it is the 212 /// compressed string. When the string compression is off, the 213 /// second field will have value zero. 214 Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs, 215 bool doCompression, std::string &Result); 216 217 /// Produce \c Result string with the same format described above. The input 218 /// is vector of PGO function name variables that are referenced. 219 Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars, 220 std::string &Result, bool doCompression = true); 221 222 /// \c NameStrings is a string composed of one of more sub-strings encoded in 223 /// the format described above. The substrings are separated by 0 or more zero 224 /// bytes. This method decodes the string and populates the \c Symtab. 225 Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab); 226 227 /// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being 228 /// set in IR PGO compilation. 229 bool isIRPGOFlagSet(const Module *M); 230 231 /// Check if we can safely rename this Comdat function. Instances of the same 232 /// comdat function may have different control flows thus can not share the 233 /// same counter variable. 234 bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken = false); 235 236 enum InstrProfValueKind : uint32_t { 237 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Enumerator = Value, 238 #include "llvm/ProfileData/InstrProfData.inc" 239 }; 240 241 /// Get the value profile data for value site \p SiteIdx from \p InstrProfR 242 /// and annotate the instruction \p Inst with the value profile meta data. 243 /// Annotate up to \p MaxMDCount (default 3) number of records per value site. 244 void annotateValueSite(Module &M, Instruction &Inst, 245 const InstrProfRecord &InstrProfR, 246 InstrProfValueKind ValueKind, uint32_t SiteIndx, 247 uint32_t MaxMDCount = 3); 248 249 /// Same as the above interface but using an ArrayRef, as well as \p Sum. 250 void annotateValueSite(Module &M, Instruction &Inst, 251 ArrayRef<InstrProfValueData> VDs, uint64_t Sum, 252 InstrProfValueKind ValueKind, uint32_t MaxMDCount); 253 254 /// Extract the value profile data from \p Inst which is annotated with 255 /// value profile meta data. Return false if there is no value data annotated, 256 /// otherwise return true. 257 bool getValueProfDataFromInst(const Instruction &Inst, 258 InstrProfValueKind ValueKind, 259 uint32_t MaxNumValueData, 260 InstrProfValueData ValueData[], 261 uint32_t &ActualNumValueData, uint64_t &TotalC); 262 263 inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; } 264 265 /// Return the PGOFuncName meta data associated with a function. 266 MDNode *getPGOFuncNameMetadata(const Function &F); 267 268 /// Create the PGOFuncName meta data if PGOFuncName is different from 269 /// function's raw name. This should only apply to internal linkage functions 270 /// declared by users only. 271 void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName); 272 273 /// Check if we can use Comdat for profile variables. This will eliminate 274 /// the duplicated profile variables for Comdat functions. 275 bool needsComdatForCounter(const Function &F, const Module &M); 276 277 const std::error_category &instrprof_category(); 278 279 enum class instrprof_error { 280 success = 0, 281 eof, 282 unrecognized_format, 283 bad_magic, 284 bad_header, 285 unsupported_version, 286 unsupported_hash_type, 287 too_large, 288 truncated, 289 malformed, 290 unknown_function, 291 hash_mismatch, 292 count_mismatch, 293 counter_overflow, 294 value_site_count_mismatch, 295 compress_failed, 296 uncompress_failed, 297 empty_raw_profile, 298 zlib_unavailable 299 }; 300 301 inline std::error_code make_error_code(instrprof_error E) { 302 return std::error_code(static_cast<int>(E), instrprof_category()); 303 } 304 305 class InstrProfError : public ErrorInfo<InstrProfError> { 306 public: 307 InstrProfError(instrprof_error Err) : Err(Err) { 308 assert(Err != instrprof_error::success && "Not an error"); 309 } 310 311 std::string message() const override; 312 313 void log(raw_ostream &OS) const override { OS << message(); } 314 315 std::error_code convertToErrorCode() const override { 316 return make_error_code(Err); 317 } 318 319 instrprof_error get() const { return Err; } 320 321 /// Consume an Error and return the raw enum value contained within it. The 322 /// Error must either be a success value, or contain a single InstrProfError. 323 static instrprof_error take(Error E) { 324 auto Err = instrprof_error::success; 325 handleAllErrors(std::move(E), [&Err](const InstrProfError &IPE) { 326 assert(Err == instrprof_error::success && "Multiple errors encountered"); 327 Err = IPE.get(); 328 }); 329 return Err; 330 } 331 332 static char ID; 333 334 private: 335 instrprof_error Err; 336 }; 337 338 class SoftInstrProfErrors { 339 /// Count the number of soft instrprof_errors encountered and keep track of 340 /// the first such error for reporting purposes. 341 342 /// The first soft error encountered. 343 instrprof_error FirstError = instrprof_error::success; 344 345 /// The number of hash mismatches. 346 unsigned NumHashMismatches = 0; 347 348 /// The number of count mismatches. 349 unsigned NumCountMismatches = 0; 350 351 /// The number of counter overflows. 352 unsigned NumCounterOverflows = 0; 353 354 /// The number of value site count mismatches. 355 unsigned NumValueSiteCountMismatches = 0; 356 357 public: 358 SoftInstrProfErrors() = default; 359 360 ~SoftInstrProfErrors() { 361 assert(FirstError == instrprof_error::success && 362 "Unchecked soft error encountered"); 363 } 364 365 /// Track a soft error (\p IE) and increment its associated counter. 366 void addError(instrprof_error IE); 367 368 /// Get the number of hash mismatches. 369 unsigned getNumHashMismatches() const { return NumHashMismatches; } 370 371 /// Get the number of count mismatches. 372 unsigned getNumCountMismatches() const { return NumCountMismatches; } 373 374 /// Get the number of counter overflows. 375 unsigned getNumCounterOverflows() const { return NumCounterOverflows; } 376 377 /// Get the number of value site count mismatches. 378 unsigned getNumValueSiteCountMismatches() const { 379 return NumValueSiteCountMismatches; 380 } 381 382 /// Return the first encountered error and reset FirstError to a success 383 /// value. 384 Error takeError() { 385 if (FirstError == instrprof_error::success) 386 return Error::success(); 387 auto E = make_error<InstrProfError>(FirstError); 388 FirstError = instrprof_error::success; 389 return E; 390 } 391 }; 392 393 namespace object { 394 395 class SectionRef; 396 397 } // end namespace object 398 399 namespace IndexedInstrProf { 400 401 uint64_t ComputeHash(StringRef K); 402 403 } // end namespace IndexedInstrProf 404 405 /// A symbol table used for function PGO name look-up with keys 406 /// (such as pointers, md5hash values) to the function. A function's 407 /// PGO name or name's md5hash are used in retrieving the profile 408 /// data of the function. See \c getPGOFuncName() method for details 409 /// on how PGO name is formed. 410 class InstrProfSymtab { 411 public: 412 using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>; 413 414 private: 415 StringRef Data; 416 uint64_t Address = 0; 417 // Unique name strings. 418 StringSet<> NameTab; 419 // A map from MD5 keys to function name strings. 420 std::vector<std::pair<uint64_t, StringRef>> MD5NameMap; 421 // A map from MD5 keys to function define. We only populate this map 422 // when build the Symtab from a Module. 423 std::vector<std::pair<uint64_t, Function *>> MD5FuncMap; 424 // A map from function runtime address to function name MD5 hash. 425 // This map is only populated and used by raw instr profile reader. 426 AddrHashMap AddrToMD5Map; 427 bool Sorted = false; 428 429 static StringRef getExternalSymbol() { 430 return "** External Symbol **"; 431 } 432 433 // If the symtab is created by a series of calls to \c addFuncName, \c 434 // finalizeSymtab needs to be called before looking up function names. 435 // This is required because the underlying map is a vector (for space 436 // efficiency) which needs to be sorted. 437 inline void finalizeSymtab(); 438 439 public: 440 InstrProfSymtab() = default; 441 442 /// Create InstrProfSymtab from an object file section which 443 /// contains function PGO names. When section may contain raw 444 /// string data or string data in compressed form. This method 445 /// only initialize the symtab with reference to the data and 446 /// the section base address. The decompression will be delayed 447 /// until before it is used. See also \c create(StringRef) method. 448 Error create(object::SectionRef &Section); 449 450 /// This interface is used by reader of CoverageMapping test 451 /// format. 452 inline Error create(StringRef D, uint64_t BaseAddr); 453 454 /// \c NameStrings is a string composed of one of more sub-strings 455 /// encoded in the format described in \c collectPGOFuncNameStrings. 456 /// This method is a wrapper to \c readPGOFuncNameStrings method. 457 inline Error create(StringRef NameStrings); 458 459 /// A wrapper interface to populate the PGO symtab with functions 460 /// decls from module \c M. This interface is used by transformation 461 /// passes such as indirect function call promotion. Variable \c InLTO 462 /// indicates if this is called from LTO optimization passes. 463 Error create(Module &M, bool InLTO = false); 464 465 /// Create InstrProfSymtab from a set of names iteratable from 466 /// \p IterRange. This interface is used by IndexedProfReader. 467 template <typename NameIterRange> Error create(const NameIterRange &IterRange); 468 469 /// Update the symtab by adding \p FuncName to the table. This interface 470 /// is used by the raw and text profile readers. 471 Error addFuncName(StringRef FuncName) { 472 if (FuncName.empty()) 473 return make_error<InstrProfError>(instrprof_error::malformed); 474 auto Ins = NameTab.insert(FuncName); 475 if (Ins.second) { 476 MD5NameMap.push_back(std::make_pair( 477 IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey())); 478 Sorted = false; 479 } 480 return Error::success(); 481 } 482 483 /// Map a function address to its name's MD5 hash. This interface 484 /// is only used by the raw profiler reader. 485 void mapAddress(uint64_t Addr, uint64_t MD5Val) { 486 AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val)); 487 } 488 489 /// Return a function's hash, or 0, if the function isn't in this SymTab. 490 uint64_t getFunctionHashFromAddress(uint64_t Address); 491 492 /// Return function's PGO name from the function name's symbol 493 /// address in the object file. If an error occurs, return 494 /// an empty string. 495 StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize); 496 497 /// Return function's PGO name from the name's md5 hash value. 498 /// If not found, return an empty string. 499 inline StringRef getFuncName(uint64_t FuncMD5Hash); 500 501 /// Just like getFuncName, except that it will return a non-empty StringRef 502 /// if the function is external to this symbol table. All such cases 503 /// will be represented using the same StringRef value. 504 inline StringRef getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash); 505 506 /// True if Symbol is the value used to represent external symbols. 507 static bool isExternalSymbol(const StringRef &Symbol) { 508 return Symbol == InstrProfSymtab::getExternalSymbol(); 509 } 510 511 /// Return function from the name's md5 hash. Return nullptr if not found. 512 inline Function *getFunction(uint64_t FuncMD5Hash); 513 514 /// Return the function's original assembly name by stripping off 515 /// the prefix attached (to symbols with priviate linkage). For 516 /// global functions, it returns the same string as getFuncName. 517 inline StringRef getOrigFuncName(uint64_t FuncMD5Hash); 518 519 /// Return the name section data. 520 inline StringRef getNameData() const { return Data; } 521 }; 522 523 Error InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) { 524 Data = D; 525 Address = BaseAddr; 526 return Error::success(); 527 } 528 529 Error InstrProfSymtab::create(StringRef NameStrings) { 530 return readPGOFuncNameStrings(NameStrings, *this); 531 } 532 533 template <typename NameIterRange> 534 Error InstrProfSymtab::create(const NameIterRange &IterRange) { 535 for (auto Name : IterRange) 536 if (Error E = addFuncName(Name)) 537 return E; 538 539 finalizeSymtab(); 540 return Error::success(); 541 } 542 543 void InstrProfSymtab::finalizeSymtab() { 544 if (Sorted) 545 return; 546 llvm::sort(MD5NameMap, less_first()); 547 llvm::sort(MD5FuncMap, less_first()); 548 llvm::sort(AddrToMD5Map, less_first()); 549 AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()), 550 AddrToMD5Map.end()); 551 Sorted = true; 552 } 553 554 StringRef InstrProfSymtab::getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash) { 555 StringRef ret = getFuncName(FuncMD5Hash); 556 if (ret.empty()) 557 return InstrProfSymtab::getExternalSymbol(); 558 return ret; 559 } 560 561 StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) { 562 finalizeSymtab(); 563 auto Result = 564 std::lower_bound(MD5NameMap.begin(), MD5NameMap.end(), FuncMD5Hash, 565 [](const std::pair<uint64_t, std::string> &LHS, 566 uint64_t RHS) { return LHS.first < RHS; }); 567 if (Result != MD5NameMap.end() && Result->first == FuncMD5Hash) 568 return Result->second; 569 return StringRef(); 570 } 571 572 Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) { 573 finalizeSymtab(); 574 auto Result = 575 std::lower_bound(MD5FuncMap.begin(), MD5FuncMap.end(), FuncMD5Hash, 576 [](const std::pair<uint64_t, Function*> &LHS, 577 uint64_t RHS) { return LHS.first < RHS; }); 578 if (Result != MD5FuncMap.end() && Result->first == FuncMD5Hash) 579 return Result->second; 580 return nullptr; 581 } 582 583 // See also getPGOFuncName implementation. These two need to be 584 // matched. 585 StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) { 586 StringRef PGOName = getFuncName(FuncMD5Hash); 587 size_t S = PGOName.find_first_of(':'); 588 if (S == StringRef::npos) 589 return PGOName; 590 return PGOName.drop_front(S + 1); 591 } 592 593 // To store the sums of profile count values, or the percentage of 594 // the sums of the total count values. 595 struct CountSumOrPercent { 596 uint64_t NumEntries; 597 double CountSum; 598 double ValueCounts[IPVK_Last - IPVK_First + 1]; 599 CountSumOrPercent() : NumEntries(0), CountSum(0.0f), ValueCounts() {} 600 void reset() { 601 NumEntries = 0; 602 CountSum = 0.0f; 603 for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) 604 ValueCounts[I] = 0.0f; 605 } 606 }; 607 608 // Function level or program level overlap information. 609 struct OverlapStats { 610 enum OverlapStatsLevel { ProgramLevel, FunctionLevel }; 611 // Sum of the total count values for the base profile. 612 CountSumOrPercent Base; 613 // Sum of the total count values for the test profile. 614 CountSumOrPercent Test; 615 // Overlap lap score. Should be in range of [0.0f to 1.0f]. 616 CountSumOrPercent Overlap; 617 CountSumOrPercent Mismatch; 618 CountSumOrPercent Unique; 619 OverlapStatsLevel Level; 620 const std::string *BaseFilename; 621 const std::string *TestFilename; 622 StringRef FuncName; 623 uint64_t FuncHash; 624 bool Valid; 625 626 OverlapStats(OverlapStatsLevel L = ProgramLevel) 627 : Level(L), BaseFilename(nullptr), TestFilename(nullptr), FuncHash(0), 628 Valid(false) {} 629 630 void dump(raw_fd_ostream &OS) const; 631 632 void setFuncInfo(StringRef Name, uint64_t Hash) { 633 FuncName = Name; 634 FuncHash = Hash; 635 } 636 637 Error accumuateCounts(const std::string &BaseFilename, 638 const std::string &TestFilename, bool IsCS); 639 void addOneMismatch(const CountSumOrPercent &MismatchFunc); 640 void addOneUnique(const CountSumOrPercent &UniqueFunc); 641 642 static inline double score(uint64_t Val1, uint64_t Val2, double Sum1, 643 double Sum2) { 644 if (Sum1 < 1.0f || Sum2 < 1.0f) 645 return 0.0f; 646 return std::min(Val1 / Sum1, Val2 / Sum2); 647 } 648 }; 649 650 // This is used to filter the functions whose overlap information 651 // to be output. 652 struct OverlapFuncFilters { 653 uint64_t ValueCutoff; 654 const std::string NameFilter; 655 }; 656 657 struct InstrProfValueSiteRecord { 658 /// Value profiling data pairs at a given value site. 659 std::list<InstrProfValueData> ValueData; 660 661 InstrProfValueSiteRecord() { ValueData.clear(); } 662 template <class InputIterator> 663 InstrProfValueSiteRecord(InputIterator F, InputIterator L) 664 : ValueData(F, L) {} 665 666 /// Sort ValueData ascending by Value 667 void sortByTargetValues() { 668 ValueData.sort( 669 [](const InstrProfValueData &left, const InstrProfValueData &right) { 670 return left.Value < right.Value; 671 }); 672 } 673 /// Sort ValueData Descending by Count 674 inline void sortByCount(); 675 676 /// Merge data from another InstrProfValueSiteRecord 677 /// Optionally scale merged counts by \p Weight. 678 void merge(InstrProfValueSiteRecord &Input, uint64_t Weight, 679 function_ref<void(instrprof_error)> Warn); 680 /// Scale up value profile data counts. 681 void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn); 682 683 /// Compute the overlap b/w this record and Input record. 684 void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind, 685 OverlapStats &Overlap, OverlapStats &FuncLevelOverlap); 686 }; 687 688 /// Profiling information for a single function. 689 struct InstrProfRecord { 690 std::vector<uint64_t> Counts; 691 692 InstrProfRecord() = default; 693 InstrProfRecord(std::vector<uint64_t> Counts) : Counts(std::move(Counts)) {} 694 InstrProfRecord(InstrProfRecord &&) = default; 695 InstrProfRecord(const InstrProfRecord &RHS) 696 : Counts(RHS.Counts), 697 ValueData(RHS.ValueData 698 ? llvm::make_unique<ValueProfData>(*RHS.ValueData) 699 : nullptr) {} 700 InstrProfRecord &operator=(InstrProfRecord &&) = default; 701 InstrProfRecord &operator=(const InstrProfRecord &RHS) { 702 Counts = RHS.Counts; 703 if (!RHS.ValueData) { 704 ValueData = nullptr; 705 return *this; 706 } 707 if (!ValueData) 708 ValueData = llvm::make_unique<ValueProfData>(*RHS.ValueData); 709 else 710 *ValueData = *RHS.ValueData; 711 return *this; 712 } 713 714 /// Return the number of value profile kinds with non-zero number 715 /// of profile sites. 716 inline uint32_t getNumValueKinds() const; 717 /// Return the number of instrumented sites for ValueKind. 718 inline uint32_t getNumValueSites(uint32_t ValueKind) const; 719 720 /// Return the total number of ValueData for ValueKind. 721 inline uint32_t getNumValueData(uint32_t ValueKind) const; 722 723 /// Return the number of value data collected for ValueKind at profiling 724 /// site: Site. 725 inline uint32_t getNumValueDataForSite(uint32_t ValueKind, 726 uint32_t Site) const; 727 728 /// Return the array of profiled values at \p Site. If \p TotalC 729 /// is not null, the total count of all target values at this site 730 /// will be stored in \c *TotalC. 731 inline std::unique_ptr<InstrProfValueData[]> 732 getValueForSite(uint32_t ValueKind, uint32_t Site, 733 uint64_t *TotalC = nullptr) const; 734 735 /// Get the target value/counts of kind \p ValueKind collected at site 736 /// \p Site and store the result in array \p Dest. Return the total 737 /// counts of all target values at this site. 738 inline uint64_t getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind, 739 uint32_t Site) const; 740 741 /// Reserve space for NumValueSites sites. 742 inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites); 743 744 /// Add ValueData for ValueKind at value Site. 745 void addValueData(uint32_t ValueKind, uint32_t Site, 746 InstrProfValueData *VData, uint32_t N, 747 InstrProfSymtab *SymTab); 748 749 /// Merge the counts in \p Other into this one. 750 /// Optionally scale merged counts by \p Weight. 751 void merge(InstrProfRecord &Other, uint64_t Weight, 752 function_ref<void(instrprof_error)> Warn); 753 754 /// Scale up profile counts (including value profile data) by 755 /// \p Weight. 756 void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn); 757 758 /// Sort value profile data (per site) by count. 759 void sortValueData() { 760 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 761 for (auto &SR : getValueSitesForKind(Kind)) 762 SR.sortByCount(); 763 } 764 765 /// Clear value data entries and edge counters. 766 void Clear() { 767 Counts.clear(); 768 clearValueData(); 769 } 770 771 /// Clear value data entries 772 void clearValueData() { ValueData = nullptr; } 773 774 /// Compute the sums of all counts and store in Sum. 775 void accumuateCounts(CountSumOrPercent &Sum) const; 776 777 /// Compute the overlap b/w this IntrprofRecord and Other. 778 void overlap(InstrProfRecord &Other, OverlapStats &Overlap, 779 OverlapStats &FuncLevelOverlap, uint64_t ValueCutoff); 780 781 /// Compute the overlap of value profile counts. 782 void overlapValueProfData(uint32_t ValueKind, InstrProfRecord &Src, 783 OverlapStats &Overlap, 784 OverlapStats &FuncLevelOverlap); 785 786 private: 787 struct ValueProfData { 788 std::vector<InstrProfValueSiteRecord> IndirectCallSites; 789 std::vector<InstrProfValueSiteRecord> MemOPSizes; 790 }; 791 std::unique_ptr<ValueProfData> ValueData; 792 793 MutableArrayRef<InstrProfValueSiteRecord> 794 getValueSitesForKind(uint32_t ValueKind) { 795 // Cast to /add/ const (should be an implicit_cast, ideally, if that's ever 796 // implemented in LLVM) to call the const overload of this function, then 797 // cast away the constness from the result. 798 auto AR = const_cast<const InstrProfRecord *>(this)->getValueSitesForKind( 799 ValueKind); 800 return makeMutableArrayRef( 801 const_cast<InstrProfValueSiteRecord *>(AR.data()), AR.size()); 802 } 803 ArrayRef<InstrProfValueSiteRecord> 804 getValueSitesForKind(uint32_t ValueKind) const { 805 if (!ValueData) 806 return None; 807 switch (ValueKind) { 808 case IPVK_IndirectCallTarget: 809 return ValueData->IndirectCallSites; 810 case IPVK_MemOPSize: 811 return ValueData->MemOPSizes; 812 default: 813 llvm_unreachable("Unknown value kind!"); 814 } 815 } 816 817 std::vector<InstrProfValueSiteRecord> & 818 getOrCreateValueSitesForKind(uint32_t ValueKind) { 819 if (!ValueData) 820 ValueData = llvm::make_unique<ValueProfData>(); 821 switch (ValueKind) { 822 case IPVK_IndirectCallTarget: 823 return ValueData->IndirectCallSites; 824 case IPVK_MemOPSize: 825 return ValueData->MemOPSizes; 826 default: 827 llvm_unreachable("Unknown value kind!"); 828 } 829 } 830 831 // Map indirect call target name hash to name string. 832 uint64_t remapValue(uint64_t Value, uint32_t ValueKind, 833 InstrProfSymtab *SymTab); 834 835 // Merge Value Profile data from Src record to this record for ValueKind. 836 // Scale merged value counts by \p Weight. 837 void mergeValueProfData(uint32_t ValkeKind, InstrProfRecord &Src, 838 uint64_t Weight, 839 function_ref<void(instrprof_error)> Warn); 840 841 // Scale up value profile data count. 842 void scaleValueProfData(uint32_t ValueKind, uint64_t Weight, 843 function_ref<void(instrprof_error)> Warn); 844 }; 845 846 struct NamedInstrProfRecord : InstrProfRecord { 847 StringRef Name; 848 uint64_t Hash; 849 850 // We reserve this bit as the flag for context sensitive profile record. 851 static const int CS_FLAG_IN_FUNC_HASH = 60; 852 853 NamedInstrProfRecord() = default; 854 NamedInstrProfRecord(StringRef Name, uint64_t Hash, 855 std::vector<uint64_t> Counts) 856 : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {} 857 858 static bool hasCSFlagInHash(uint64_t FuncHash) { 859 return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1); 860 } 861 static void setCSFlagInHash(uint64_t &FuncHash) { 862 FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH); 863 } 864 }; 865 866 uint32_t InstrProfRecord::getNumValueKinds() const { 867 uint32_t NumValueKinds = 0; 868 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 869 NumValueKinds += !(getValueSitesForKind(Kind).empty()); 870 return NumValueKinds; 871 } 872 873 uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const { 874 uint32_t N = 0; 875 for (auto &SR : getValueSitesForKind(ValueKind)) 876 N += SR.ValueData.size(); 877 return N; 878 } 879 880 uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const { 881 return getValueSitesForKind(ValueKind).size(); 882 } 883 884 uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind, 885 uint32_t Site) const { 886 return getValueSitesForKind(ValueKind)[Site].ValueData.size(); 887 } 888 889 std::unique_ptr<InstrProfValueData[]> 890 InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site, 891 uint64_t *TotalC) const { 892 uint64_t Dummy; 893 uint64_t &TotalCount = (TotalC == nullptr ? Dummy : *TotalC); 894 uint32_t N = getNumValueDataForSite(ValueKind, Site); 895 if (N == 0) { 896 TotalCount = 0; 897 return std::unique_ptr<InstrProfValueData[]>(nullptr); 898 } 899 900 auto VD = llvm::make_unique<InstrProfValueData[]>(N); 901 TotalCount = getValueForSite(VD.get(), ValueKind, Site); 902 903 return VD; 904 } 905 906 uint64_t InstrProfRecord::getValueForSite(InstrProfValueData Dest[], 907 uint32_t ValueKind, 908 uint32_t Site) const { 909 uint32_t I = 0; 910 uint64_t TotalCount = 0; 911 for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) { 912 Dest[I].Value = V.Value; 913 Dest[I].Count = V.Count; 914 TotalCount = SaturatingAdd(TotalCount, V.Count); 915 I++; 916 } 917 return TotalCount; 918 } 919 920 void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) { 921 if (!NumValueSites) 922 return; 923 getOrCreateValueSitesForKind(ValueKind).reserve(NumValueSites); 924 } 925 926 inline support::endianness getHostEndianness() { 927 return sys::IsLittleEndianHost ? support::little : support::big; 928 } 929 930 // Include definitions for value profile data 931 #define INSTR_PROF_VALUE_PROF_DATA 932 #include "llvm/ProfileData/InstrProfData.inc" 933 934 void InstrProfValueSiteRecord::sortByCount() { 935 ValueData.sort( 936 [](const InstrProfValueData &left, const InstrProfValueData &right) { 937 return left.Count > right.Count; 938 }); 939 // Now truncate 940 size_t max_s = INSTR_PROF_MAX_NUM_VAL_PER_SITE; 941 if (ValueData.size() > max_s) 942 ValueData.resize(max_s); 943 } 944 945 namespace IndexedInstrProf { 946 947 enum class HashT : uint32_t { 948 MD5, 949 Last = MD5 950 }; 951 952 inline uint64_t ComputeHash(HashT Type, StringRef K) { 953 switch (Type) { 954 case HashT::MD5: 955 return MD5Hash(K); 956 } 957 llvm_unreachable("Unhandled hash type"); 958 } 959 960 const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" 961 962 enum ProfVersion { 963 // Version 1 is the first version. In this version, the value of 964 // a key/value pair can only include profile data of a single function. 965 // Due to this restriction, the number of block counters for a given 966 // function is not recorded but derived from the length of the value. 967 Version1 = 1, 968 // The version 2 format supports recording profile data of multiple 969 // functions which share the same key in one value field. To support this, 970 // the number block counters is recorded as an uint64_t field right after the 971 // function structural hash. 972 Version2 = 2, 973 // Version 3 supports value profile data. The value profile data is expected 974 // to follow the block counter profile data. 975 Version3 = 3, 976 // In this version, profile summary data \c IndexedInstrProf::Summary is 977 // stored after the profile header. 978 Version4 = 4, 979 // In this version, the frontend PGO stable hash algorithm defaults to V2. 980 Version5 = 5, 981 // The current version is 5. 982 CurrentVersion = INSTR_PROF_INDEX_VERSION 983 }; 984 const uint64_t Version = ProfVersion::CurrentVersion; 985 986 const HashT HashType = HashT::MD5; 987 988 inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); } 989 990 // This structure defines the file header of the LLVM profile 991 // data file in indexed-format. 992 struct Header { 993 uint64_t Magic; 994 uint64_t Version; 995 uint64_t Unused; // Becomes unused since version 4 996 uint64_t HashType; 997 uint64_t HashOffset; 998 }; 999 1000 // Profile summary data recorded in the profile data file in indexed 1001 // format. It is introduced in version 4. The summary data follows 1002 // right after the profile file header. 1003 struct Summary { 1004 struct Entry { 1005 uint64_t Cutoff; ///< The required percentile of total execution count. 1006 uint64_t 1007 MinBlockCount; ///< The minimum execution count for this percentile. 1008 uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count. 1009 }; 1010 // The field kind enumerator to assigned value mapping should remain 1011 // unchanged when a new kind is added or an old kind gets deleted in 1012 // the future. 1013 enum SummaryFieldKind { 1014 /// The total number of functions instrumented. 1015 TotalNumFunctions = 0, 1016 /// Total number of instrumented blocks/edges. 1017 TotalNumBlocks = 1, 1018 /// The maximal execution count among all functions. 1019 /// This field does not exist for profile data from IR based 1020 /// instrumentation. 1021 MaxFunctionCount = 2, 1022 /// Max block count of the program. 1023 MaxBlockCount = 3, 1024 /// Max internal block count of the program (excluding entry blocks). 1025 MaxInternalBlockCount = 4, 1026 /// The sum of all instrumented block counts. 1027 TotalBlockCount = 5, 1028 NumKinds = TotalBlockCount + 1 1029 }; 1030 1031 // The number of summmary fields following the summary header. 1032 uint64_t NumSummaryFields; 1033 // The number of Cutoff Entries (Summary::Entry) following summary fields. 1034 uint64_t NumCutoffEntries; 1035 1036 Summary() = delete; 1037 Summary(uint32_t Size) { memset(this, 0, Size); } 1038 1039 void operator delete(void *ptr) { ::operator delete(ptr); } 1040 1041 static uint32_t getSize(uint32_t NumSumFields, uint32_t NumCutoffEntries) { 1042 return sizeof(Summary) + NumCutoffEntries * sizeof(Entry) + 1043 NumSumFields * sizeof(uint64_t); 1044 } 1045 1046 const uint64_t *getSummaryDataBase() const { 1047 return reinterpret_cast<const uint64_t *>(this + 1); 1048 } 1049 1050 uint64_t *getSummaryDataBase() { 1051 return reinterpret_cast<uint64_t *>(this + 1); 1052 } 1053 1054 const Entry *getCutoffEntryBase() const { 1055 return reinterpret_cast<const Entry *>( 1056 &getSummaryDataBase()[NumSummaryFields]); 1057 } 1058 1059 Entry *getCutoffEntryBase() { 1060 return reinterpret_cast<Entry *>(&getSummaryDataBase()[NumSummaryFields]); 1061 } 1062 1063 uint64_t get(SummaryFieldKind K) const { 1064 return getSummaryDataBase()[K]; 1065 } 1066 1067 void set(SummaryFieldKind K, uint64_t V) { 1068 getSummaryDataBase()[K] = V; 1069 } 1070 1071 const Entry &getEntry(uint32_t I) const { return getCutoffEntryBase()[I]; } 1072 1073 void setEntry(uint32_t I, const ProfileSummaryEntry &E) { 1074 Entry &ER = getCutoffEntryBase()[I]; 1075 ER.Cutoff = E.Cutoff; 1076 ER.MinBlockCount = E.MinCount; 1077 ER.NumBlocks = E.NumCounts; 1078 } 1079 }; 1080 1081 inline std::unique_ptr<Summary> allocSummary(uint32_t TotalSize) { 1082 return std::unique_ptr<Summary>(new (::operator new(TotalSize)) 1083 Summary(TotalSize)); 1084 } 1085 1086 } // end namespace IndexedInstrProf 1087 1088 namespace RawInstrProf { 1089 1090 // Version 1: First version 1091 // Version 2: Added value profile data section. Per-function control data 1092 // struct has more fields to describe value profile information. 1093 // Version 3: Compressed name section support. Function PGO name reference 1094 // from control data struct is changed from raw pointer to Name's MD5 value. 1095 // Version 4: ValueDataBegin and ValueDataSizes fields are removed from the 1096 // raw header. 1097 // Version 5: Bit 60 of FuncHash is reserved for the flag for the context 1098 // sensitive records. 1099 const uint64_t Version = INSTR_PROF_RAW_VERSION; 1100 1101 template <class IntPtrT> inline uint64_t getMagic(); 1102 template <> inline uint64_t getMagic<uint64_t>() { 1103 return INSTR_PROF_RAW_MAGIC_64; 1104 } 1105 1106 template <> inline uint64_t getMagic<uint32_t>() { 1107 return INSTR_PROF_RAW_MAGIC_32; 1108 } 1109 1110 // Per-function profile data header/control structure. 1111 // The definition should match the structure defined in 1112 // compiler-rt/lib/profile/InstrProfiling.h. 1113 // It should also match the synthesized type in 1114 // Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters. 1115 template <class IntPtrT> struct alignas(8) ProfileData { 1116 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name; 1117 #include "llvm/ProfileData/InstrProfData.inc" 1118 }; 1119 1120 // File header structure of the LLVM profile data in raw format. 1121 // The definition should match the header referenced in 1122 // compiler-rt/lib/profile/InstrProfilingFile.c and 1123 // InstrProfilingBuffer.c. 1124 struct Header { 1125 #define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name; 1126 #include "llvm/ProfileData/InstrProfData.inc" 1127 }; 1128 1129 } // end namespace RawInstrProf 1130 1131 // Parse MemOP Size range option. 1132 void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, 1133 int64_t &RangeLast); 1134 1135 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 1136 // aware this is an ir_level profile so it can set the version flag. 1137 void createIRLevelProfileFlagVar(Module &M, bool IsCS); 1138 1139 // Create the variable for the profile file name. 1140 void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput); 1141 1142 } // end namespace llvm 1143 #endif // LLVM_PROFILEDATA_INSTRPROF_H 1144