1 //===- InstrProf.h - Instrumented profiling format support ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Instrumentation-based profiling data is generated by instrumented
10 // binaries through library functions in compiler-rt, and read by the clang
11 // frontend to feed PGO.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_PROFILEDATA_INSTRPROF_H
16 #define LLVM_PROFILEDATA_INSTRPROF_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/BitmaskEnum.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/ADT/Triple.h"
24 #include "llvm/IR/GlobalValue.h"
25 #include "llvm/IR/ProfileSummary.h"
26 #include "llvm/ProfileData/InstrProfData.inc"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/Compiler.h"
29 #include "llvm/Support/Endian.h"
30 #include "llvm/Support/Error.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/Host.h"
33 #include "llvm/Support/MD5.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <cassert>
38 #include <cstddef>
39 #include <cstdint>
40 #include <cstring>
41 #include <list>
42 #include <memory>
43 #include <string>
44 #include <system_error>
45 #include <utility>
46 #include <vector>
47 
48 namespace llvm {
49 
50 class Function;
51 class GlobalVariable;
52 struct InstrProfRecord;
53 class InstrProfSymtab;
54 class Instruction;
55 class MDNode;
56 class Module;
57 
58 enum InstrProfSectKind {
59 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
60 #include "llvm/ProfileData/InstrProfData.inc"
61 };
62 
63 /// Return the name of the profile section corresponding to \p IPSK.
64 ///
65 /// The name of the section depends on the object format type \p OF. If
66 /// \p AddSegmentInfo is true, a segment prefix and additional linker hints may
67 /// be added to the section name (this is the default).
68 std::string getInstrProfSectionName(InstrProfSectKind IPSK,
69                                     Triple::ObjectFormatType OF,
70                                     bool AddSegmentInfo = true);
71 
72 /// Return the name profile runtime entry point to do value profiling
73 /// for a given site.
74 inline StringRef getInstrProfValueProfFuncName() {
75   return INSTR_PROF_VALUE_PROF_FUNC_STR;
76 }
77 
78 /// Return the name profile runtime entry point to do memop size value
79 /// profiling.
80 inline StringRef getInstrProfValueProfMemOpFuncName() {
81   return INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR;
82 }
83 
84 /// Return the name prefix of variables containing instrumented function names.
85 inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; }
86 
87 /// Return the name prefix of variables containing per-function control data.
88 inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; }
89 
90 /// Return the name prefix of profile counter variables.
91 inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; }
92 
93 /// Return the name prefix of value profile variables.
94 inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; }
95 
96 /// Return the name of value profile node array variables:
97 inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; }
98 
99 /// Return the name of the variable holding the strings (possibly compressed)
100 /// of all function's PGO names.
101 inline StringRef getInstrProfNamesVarName() {
102   return "__llvm_prf_nm";
103 }
104 
105 /// Return the name of a covarage mapping variable (internal linkage)
106 /// for each instrumented source module. Such variables are allocated
107 /// in the __llvm_covmap section.
108 inline StringRef getCoverageMappingVarName() {
109   return "__llvm_coverage_mapping";
110 }
111 
112 /// Return the name of the internal variable recording the array
113 /// of PGO name vars referenced by the coverage mapping. The owning
114 /// functions of those names are not emitted by FE (e.g, unused inline
115 /// functions.)
116 inline StringRef getCoverageUnusedNamesVarName() {
117   return "__llvm_coverage_names";
118 }
119 
120 /// Return the name of function that registers all the per-function control
121 /// data at program startup time by calling __llvm_register_function. This
122 /// function has internal linkage and is called by  __llvm_profile_init
123 /// runtime method. This function is not generated for these platforms:
124 /// Darwin, Linux, and FreeBSD.
125 inline StringRef getInstrProfRegFuncsName() {
126   return "__llvm_profile_register_functions";
127 }
128 
129 /// Return the name of the runtime interface that registers per-function control
130 /// data for one instrumented function.
131 inline StringRef getInstrProfRegFuncName() {
132   return "__llvm_profile_register_function";
133 }
134 
135 /// Return the name of the runtime interface that registers the PGO name strings.
136 inline StringRef getInstrProfNamesRegFuncName() {
137   return "__llvm_profile_register_names_function";
138 }
139 
140 /// Return the name of the runtime initialization method that is generated by
141 /// the compiler. The function calls __llvm_profile_register_functions and
142 /// __llvm_profile_override_default_filename functions if needed. This function
143 /// has internal linkage and invoked at startup time via init_array.
144 inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; }
145 
146 /// Return the name of the hook variable defined in profile runtime library.
147 /// A reference to the variable causes the linker to link in the runtime
148 /// initialization module (which defines the hook variable).
149 inline StringRef getInstrProfRuntimeHookVarName() {
150   return INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_RUNTIME_VAR);
151 }
152 
153 /// Return the name of the compiler generated function that references the
154 /// runtime hook variable. The function is a weak global.
155 inline StringRef getInstrProfRuntimeHookVarUseFuncName() {
156   return "__llvm_profile_runtime_user";
157 }
158 
159 inline StringRef getInstrProfCounterBiasVarName() {
160   return INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_VAR);
161 }
162 
163 /// Return the marker used to separate PGO names during serialization.
164 inline StringRef getInstrProfNameSeparator() { return "\01"; }
165 
166 /// Return the modified name for function \c F suitable to be
167 /// used the key for profile lookup. Variable \c InLTO indicates if this
168 /// is called in LTO optimization passes.
169 std::string getPGOFuncName(const Function &F, bool InLTO = false,
170                            uint64_t Version = INSTR_PROF_INDEX_VERSION);
171 
172 /// Return the modified name for a function suitable to be
173 /// used the key for profile lookup. The function's original
174 /// name is \c RawFuncName and has linkage of type \c Linkage.
175 /// The function is defined in module \c FileName.
176 std::string getPGOFuncName(StringRef RawFuncName,
177                            GlobalValue::LinkageTypes Linkage,
178                            StringRef FileName,
179                            uint64_t Version = INSTR_PROF_INDEX_VERSION);
180 
181 /// Return the name of the global variable used to store a function
182 /// name in PGO instrumentation. \c FuncName is the name of the function
183 /// returned by the \c getPGOFuncName call.
184 std::string getPGOFuncNameVarName(StringRef FuncName,
185                                   GlobalValue::LinkageTypes Linkage);
186 
187 /// Create and return the global variable for function name used in PGO
188 /// instrumentation. \c FuncName is the name of the function returned
189 /// by \c getPGOFuncName call.
190 GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName);
191 
192 /// Create and return the global variable for function name used in PGO
193 /// instrumentation.  /// \c FuncName is the name of the function
194 /// returned by \c getPGOFuncName call, \c M is the owning module,
195 /// and \c Linkage is the linkage of the instrumented function.
196 GlobalVariable *createPGOFuncNameVar(Module &M,
197                                      GlobalValue::LinkageTypes Linkage,
198                                      StringRef PGOFuncName);
199 
200 /// Return the initializer in string of the PGO name var \c NameVar.
201 StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar);
202 
203 /// Given a PGO function name, remove the filename prefix and return
204 /// the original (static) function name.
205 StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName,
206                                    StringRef FileName = "<unknown>");
207 
208 /// Given a vector of strings (function PGO names) \c NameStrs, the
209 /// method generates a combined string \c Result that is ready to be
210 /// serialized.  The \c Result string is comprised of three fields:
211 /// The first field is the length of the uncompressed strings, and the
212 /// the second field is the length of the zlib-compressed string.
213 /// Both fields are encoded in ULEB128.  If \c doCompress is false, the
214 ///  third field is the uncompressed strings; otherwise it is the
215 /// compressed string. When the string compression is off, the
216 /// second field will have value zero.
217 Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs,
218                                 bool doCompression, std::string &Result);
219 
220 /// Produce \c Result string with the same format described above. The input
221 /// is vector of PGO function name variables that are referenced.
222 Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
223                                 std::string &Result, bool doCompression = true);
224 
225 /// \c NameStrings is a string composed of one of more sub-strings encoded in
226 /// the format described above. The substrings are separated by 0 or more zero
227 /// bytes. This method decodes the string and populates the \c Symtab.
228 Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
229 
230 /// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being
231 /// set in IR PGO compilation.
232 bool isIRPGOFlagSet(const Module *M);
233 
234 /// Check if we can safely rename this Comdat function. Instances of the same
235 /// comdat function may have different control flows thus can not share the
236 /// same counter variable.
237 bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken = false);
238 
239 enum InstrProfValueKind : uint32_t {
240 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Enumerator = Value,
241 #include "llvm/ProfileData/InstrProfData.inc"
242 };
243 
244 /// Get the value profile data for value site \p SiteIdx from \p InstrProfR
245 /// and annotate the instruction \p Inst with the value profile meta data.
246 /// Annotate up to \p MaxMDCount (default 3) number of records per value site.
247 void annotateValueSite(Module &M, Instruction &Inst,
248                        const InstrProfRecord &InstrProfR,
249                        InstrProfValueKind ValueKind, uint32_t SiteIndx,
250                        uint32_t MaxMDCount = 3);
251 
252 /// Same as the above interface but using an ArrayRef, as well as \p Sum.
253 void annotateValueSite(Module &M, Instruction &Inst,
254                        ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
255                        InstrProfValueKind ValueKind, uint32_t MaxMDCount);
256 
257 /// Extract the value profile data from \p Inst which is annotated with
258 /// value profile meta data. Return false if there is no value data annotated,
259 /// otherwise  return true.
260 bool getValueProfDataFromInst(const Instruction &Inst,
261                               InstrProfValueKind ValueKind,
262                               uint32_t MaxNumValueData,
263                               InstrProfValueData ValueData[],
264                               uint32_t &ActualNumValueData, uint64_t &TotalC,
265                               bool GetNoICPValue = false);
266 
267 inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; }
268 
269 /// Return the PGOFuncName meta data associated with a function.
270 MDNode *getPGOFuncNameMetadata(const Function &F);
271 
272 /// Create the PGOFuncName meta data if PGOFuncName is different from
273 /// function's raw name. This should only apply to internal linkage functions
274 /// declared by users only.
275 void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName);
276 
277 /// Check if we can use Comdat for profile variables. This will eliminate
278 /// the duplicated profile variables for Comdat functions.
279 bool needsComdatForCounter(const Function &F, const Module &M);
280 
281 /// An enum describing the attributes of an instrumented profile.
282 enum class InstrProfKind {
283   Unknown = 0x0,
284   // A frontend clang profile, incompatible with other attrs.
285   FrontendInstrumentation = 0x1,
286   // An IR-level profile (default when -fprofile-generate is used).
287   IRInstrumentation = 0x2,
288   // A profile with entry basic block instrumentation.
289   FunctionEntryInstrumentation = 0x4,
290   // A context sensitive IR-level profile.
291   ContextSensitive = 0x8,
292   // Use single byte probes for coverage.
293   SingleByteCoverage = 0x10,
294   // Only instrument the function entry basic block.
295   FunctionEntryOnly = 0x20,
296   // A memory profile collected using -fprofile=memory.
297   MemProf = 0x40,
298   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/MemProf)
299 };
300 
301 const std::error_category &instrprof_category();
302 
303 enum class instrprof_error {
304   success = 0,
305   eof,
306   unrecognized_format,
307   bad_magic,
308   bad_header,
309   unsupported_version,
310   unsupported_hash_type,
311   too_large,
312   truncated,
313   malformed,
314   missing_debug_info_for_correlation,
315   unexpected_debug_info_for_correlation,
316   unable_to_correlate_profile,
317   unknown_function,
318   invalid_prof,
319   hash_mismatch,
320   count_mismatch,
321   counter_overflow,
322   value_site_count_mismatch,
323   compress_failed,
324   uncompress_failed,
325   empty_raw_profile,
326   zlib_unavailable
327 };
328 
329 inline std::error_code make_error_code(instrprof_error E) {
330   return std::error_code(static_cast<int>(E), instrprof_category());
331 }
332 
333 class InstrProfError : public ErrorInfo<InstrProfError> {
334 public:
335   InstrProfError(instrprof_error Err, const Twine &ErrStr = Twine())
336       : Err(Err), Msg(ErrStr.str()) {
337     assert(Err != instrprof_error::success && "Not an error");
338   }
339 
340   std::string message() const override;
341 
342   void log(raw_ostream &OS) const override { OS << message(); }
343 
344   std::error_code convertToErrorCode() const override {
345     return make_error_code(Err);
346   }
347 
348   instrprof_error get() const { return Err; }
349   const std::string &getMessage() const { return Msg; }
350 
351   /// Consume an Error and return the raw enum value contained within it. The
352   /// Error must either be a success value, or contain a single InstrProfError.
353   static instrprof_error take(Error E) {
354     auto Err = instrprof_error::success;
355     handleAllErrors(std::move(E), [&Err](const InstrProfError &IPE) {
356       assert(Err == instrprof_error::success && "Multiple errors encountered");
357       Err = IPE.get();
358     });
359     return Err;
360   }
361 
362   static char ID;
363 
364 private:
365   instrprof_error Err;
366   std::string Msg;
367 };
368 
369 class SoftInstrProfErrors {
370   /// Count the number of soft instrprof_errors encountered and keep track of
371   /// the first such error for reporting purposes.
372 
373   /// The first soft error encountered.
374   instrprof_error FirstError = instrprof_error::success;
375 
376   /// The number of hash mismatches.
377   unsigned NumHashMismatches = 0;
378 
379   /// The number of count mismatches.
380   unsigned NumCountMismatches = 0;
381 
382   /// The number of counter overflows.
383   unsigned NumCounterOverflows = 0;
384 
385   /// The number of value site count mismatches.
386   unsigned NumValueSiteCountMismatches = 0;
387 
388 public:
389   SoftInstrProfErrors() = default;
390 
391   ~SoftInstrProfErrors() {
392     assert(FirstError == instrprof_error::success &&
393            "Unchecked soft error encountered");
394   }
395 
396   /// Track a soft error (\p IE) and increment its associated counter.
397   void addError(instrprof_error IE);
398 
399   /// Get the number of hash mismatches.
400   unsigned getNumHashMismatches() const { return NumHashMismatches; }
401 
402   /// Get the number of count mismatches.
403   unsigned getNumCountMismatches() const { return NumCountMismatches; }
404 
405   /// Get the number of counter overflows.
406   unsigned getNumCounterOverflows() const { return NumCounterOverflows; }
407 
408   /// Get the number of value site count mismatches.
409   unsigned getNumValueSiteCountMismatches() const {
410     return NumValueSiteCountMismatches;
411   }
412 
413   /// Return the first encountered error and reset FirstError to a success
414   /// value.
415   Error takeError() {
416     if (FirstError == instrprof_error::success)
417       return Error::success();
418     auto E = make_error<InstrProfError>(FirstError);
419     FirstError = instrprof_error::success;
420     return E;
421   }
422 };
423 
424 namespace object {
425 
426 class SectionRef;
427 
428 } // end namespace object
429 
430 namespace IndexedInstrProf {
431 
432 uint64_t ComputeHash(StringRef K);
433 
434 } // end namespace IndexedInstrProf
435 
436 /// A symbol table used for function PGO name look-up with keys
437 /// (such as pointers, md5hash values) to the function. A function's
438 /// PGO name or name's md5hash are used in retrieving the profile
439 /// data of the function. See \c getPGOFuncName() method for details
440 /// on how PGO name is formed.
441 class InstrProfSymtab {
442 public:
443   using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>;
444 
445 private:
446   StringRef Data;
447   uint64_t Address = 0;
448   // Unique name strings.
449   StringSet<> NameTab;
450   // A map from MD5 keys to function name strings.
451   std::vector<std::pair<uint64_t, StringRef>> MD5NameMap;
452   // A map from MD5 keys to function define. We only populate this map
453   // when build the Symtab from a Module.
454   std::vector<std::pair<uint64_t, Function *>> MD5FuncMap;
455   // A map from function runtime address to function name MD5 hash.
456   // This map is only populated and used by raw instr profile reader.
457   AddrHashMap AddrToMD5Map;
458   bool Sorted = false;
459 
460   static StringRef getExternalSymbol() {
461     return "** External Symbol **";
462   }
463 
464   // If the symtab is created by a series of calls to \c addFuncName, \c
465   // finalizeSymtab needs to be called before looking up function names.
466   // This is required because the underlying map is a vector (for space
467   // efficiency) which needs to be sorted.
468   inline void finalizeSymtab();
469 
470 public:
471   InstrProfSymtab() = default;
472 
473   /// Create InstrProfSymtab from an object file section which
474   /// contains function PGO names. When section may contain raw
475   /// string data or string data in compressed form. This method
476   /// only initialize the symtab with reference to the data and
477   /// the section base address. The decompression will be delayed
478   /// until before it is used. See also \c create(StringRef) method.
479   Error create(object::SectionRef &Section);
480 
481   /// This interface is used by reader of CoverageMapping test
482   /// format.
483   inline Error create(StringRef D, uint64_t BaseAddr);
484 
485   /// \c NameStrings is a string composed of one of more sub-strings
486   ///  encoded in the format described in \c collectPGOFuncNameStrings.
487   /// This method is a wrapper to \c readPGOFuncNameStrings method.
488   inline Error create(StringRef NameStrings);
489 
490   /// A wrapper interface to populate the PGO symtab with functions
491   /// decls from module \c M. This interface is used by transformation
492   /// passes such as indirect function call promotion. Variable \c InLTO
493   /// indicates if this is called from LTO optimization passes.
494   Error create(Module &M, bool InLTO = false);
495 
496   /// Create InstrProfSymtab from a set of names iteratable from
497   /// \p IterRange. This interface is used by IndexedProfReader.
498   template <typename NameIterRange> Error create(const NameIterRange &IterRange);
499 
500   /// Update the symtab by adding \p FuncName to the table. This interface
501   /// is used by the raw and text profile readers.
502   Error addFuncName(StringRef FuncName) {
503     if (FuncName.empty())
504       return make_error<InstrProfError>(instrprof_error::malformed,
505                                         "function name is empty");
506     auto Ins = NameTab.insert(FuncName);
507     if (Ins.second) {
508       MD5NameMap.push_back(std::make_pair(
509           IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey()));
510       Sorted = false;
511     }
512     return Error::success();
513   }
514 
515   /// Map a function address to its name's MD5 hash. This interface
516   /// is only used by the raw profiler reader.
517   void mapAddress(uint64_t Addr, uint64_t MD5Val) {
518     AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
519   }
520 
521   /// Return a function's hash, or 0, if the function isn't in this SymTab.
522   uint64_t getFunctionHashFromAddress(uint64_t Address);
523 
524   /// Return function's PGO name from the function name's symbol
525   /// address in the object file. If an error occurs, return
526   /// an empty string.
527   StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
528 
529   /// Return function's PGO name from the name's md5 hash value.
530   /// If not found, return an empty string.
531   inline StringRef getFuncName(uint64_t FuncMD5Hash);
532 
533   /// Just like getFuncName, except that it will return a non-empty StringRef
534   /// if the function is external to this symbol table. All such cases
535   /// will be represented using the same StringRef value.
536   inline StringRef getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash);
537 
538   /// True if Symbol is the value used to represent external symbols.
539   static bool isExternalSymbol(const StringRef &Symbol) {
540     return Symbol == InstrProfSymtab::getExternalSymbol();
541   }
542 
543   /// Return function from the name's md5 hash. Return nullptr if not found.
544   inline Function *getFunction(uint64_t FuncMD5Hash);
545 
546   /// Return the function's original assembly name by stripping off
547   /// the prefix attached (to symbols with priviate linkage). For
548   /// global functions, it returns the same string as getFuncName.
549   inline StringRef getOrigFuncName(uint64_t FuncMD5Hash);
550 
551   /// Return the name section data.
552   inline StringRef getNameData() const { return Data; }
553 
554   /// Dump the symbols in this table.
555   void dumpNames(raw_ostream &OS) const {
556     for (StringRef S : NameTab.keys())
557       OS << S << "\n";
558   }
559 };
560 
561 Error InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
562   Data = D;
563   Address = BaseAddr;
564   return Error::success();
565 }
566 
567 Error InstrProfSymtab::create(StringRef NameStrings) {
568   return readPGOFuncNameStrings(NameStrings, *this);
569 }
570 
571 template <typename NameIterRange>
572 Error InstrProfSymtab::create(const NameIterRange &IterRange) {
573   for (auto Name : IterRange)
574     if (Error E = addFuncName(Name))
575       return E;
576 
577   finalizeSymtab();
578   return Error::success();
579 }
580 
581 void InstrProfSymtab::finalizeSymtab() {
582   if (Sorted)
583     return;
584   llvm::sort(MD5NameMap, less_first());
585   llvm::sort(MD5FuncMap, less_first());
586   llvm::sort(AddrToMD5Map, less_first());
587   AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
588                      AddrToMD5Map.end());
589   Sorted = true;
590 }
591 
592 StringRef InstrProfSymtab::getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash) {
593   StringRef ret = getFuncName(FuncMD5Hash);
594   if (ret.empty())
595     return InstrProfSymtab::getExternalSymbol();
596   return ret;
597 }
598 
599 StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) {
600   finalizeSymtab();
601   auto Result = llvm::lower_bound(MD5NameMap, FuncMD5Hash,
602                                   [](const std::pair<uint64_t, StringRef> &LHS,
603                                      uint64_t RHS) { return LHS.first < RHS; });
604   if (Result != MD5NameMap.end() && Result->first == FuncMD5Hash)
605     return Result->second;
606   return StringRef();
607 }
608 
609 Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) {
610   finalizeSymtab();
611   auto Result = llvm::lower_bound(MD5FuncMap, FuncMD5Hash,
612                                   [](const std::pair<uint64_t, Function *> &LHS,
613                                      uint64_t RHS) { return LHS.first < RHS; });
614   if (Result != MD5FuncMap.end() && Result->first == FuncMD5Hash)
615     return Result->second;
616   return nullptr;
617 }
618 
619 // See also getPGOFuncName implementation. These two need to be
620 // matched.
621 StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) {
622   StringRef PGOName = getFuncName(FuncMD5Hash);
623   size_t S = PGOName.find_first_of(':');
624   if (S == StringRef::npos)
625     return PGOName;
626   return PGOName.drop_front(S + 1);
627 }
628 
629 // To store the sums of profile count values, or the percentage of
630 // the sums of the total count values.
631 struct CountSumOrPercent {
632   uint64_t NumEntries;
633   double CountSum;
634   double ValueCounts[IPVK_Last - IPVK_First + 1];
635   CountSumOrPercent() : NumEntries(0), CountSum(0.0f), ValueCounts() {}
636   void reset() {
637     NumEntries = 0;
638     CountSum = 0.0f;
639     for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++)
640       ValueCounts[I] = 0.0f;
641   }
642 };
643 
644 // Function level or program level overlap information.
645 struct OverlapStats {
646   enum OverlapStatsLevel { ProgramLevel, FunctionLevel };
647   // Sum of the total count values for the base profile.
648   CountSumOrPercent Base;
649   // Sum of the total count values for the test profile.
650   CountSumOrPercent Test;
651   // Overlap lap score. Should be in range of [0.0f to 1.0f].
652   CountSumOrPercent Overlap;
653   CountSumOrPercent Mismatch;
654   CountSumOrPercent Unique;
655   OverlapStatsLevel Level;
656   const std::string *BaseFilename;
657   const std::string *TestFilename;
658   StringRef FuncName;
659   uint64_t FuncHash;
660   bool Valid;
661 
662   OverlapStats(OverlapStatsLevel L = ProgramLevel)
663       : Level(L), BaseFilename(nullptr), TestFilename(nullptr), FuncHash(0),
664         Valid(false) {}
665 
666   void dump(raw_fd_ostream &OS) const;
667 
668   void setFuncInfo(StringRef Name, uint64_t Hash) {
669     FuncName = Name;
670     FuncHash = Hash;
671   }
672 
673   Error accumulateCounts(const std::string &BaseFilename,
674                          const std::string &TestFilename, bool IsCS);
675   void addOneMismatch(const CountSumOrPercent &MismatchFunc);
676   void addOneUnique(const CountSumOrPercent &UniqueFunc);
677 
678   static inline double score(uint64_t Val1, uint64_t Val2, double Sum1,
679                              double Sum2) {
680     if (Sum1 < 1.0f || Sum2 < 1.0f)
681       return 0.0f;
682     return std::min(Val1 / Sum1, Val2 / Sum2);
683   }
684 };
685 
686 // This is used to filter the functions whose overlap information
687 // to be output.
688 struct OverlapFuncFilters {
689   uint64_t ValueCutoff;
690   const std::string NameFilter;
691 };
692 
693 struct InstrProfValueSiteRecord {
694   /// Value profiling data pairs at a given value site.
695   std::list<InstrProfValueData> ValueData;
696 
697   InstrProfValueSiteRecord() { ValueData.clear(); }
698   template <class InputIterator>
699   InstrProfValueSiteRecord(InputIterator F, InputIterator L)
700       : ValueData(F, L) {}
701 
702   /// Sort ValueData ascending by Value
703   void sortByTargetValues() {
704     ValueData.sort(
705         [](const InstrProfValueData &left, const InstrProfValueData &right) {
706           return left.Value < right.Value;
707         });
708   }
709   /// Sort ValueData Descending by Count
710   inline void sortByCount();
711 
712   /// Merge data from another InstrProfValueSiteRecord
713   /// Optionally scale merged counts by \p Weight.
714   void merge(InstrProfValueSiteRecord &Input, uint64_t Weight,
715              function_ref<void(instrprof_error)> Warn);
716   /// Scale up value profile data counts by N (Numerator) / D (Denominator).
717   void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn);
718 
719   /// Compute the overlap b/w this record and Input record.
720   void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind,
721                OverlapStats &Overlap, OverlapStats &FuncLevelOverlap);
722 };
723 
724 /// Profiling information for a single function.
725 struct InstrProfRecord {
726   std::vector<uint64_t> Counts;
727 
728   InstrProfRecord() = default;
729   InstrProfRecord(std::vector<uint64_t> Counts) : Counts(std::move(Counts)) {}
730   InstrProfRecord(InstrProfRecord &&) = default;
731   InstrProfRecord(const InstrProfRecord &RHS)
732       : Counts(RHS.Counts),
733         ValueData(RHS.ValueData
734                       ? std::make_unique<ValueProfData>(*RHS.ValueData)
735                       : nullptr) {}
736   InstrProfRecord &operator=(InstrProfRecord &&) = default;
737   InstrProfRecord &operator=(const InstrProfRecord &RHS) {
738     Counts = RHS.Counts;
739     if (!RHS.ValueData) {
740       ValueData = nullptr;
741       return *this;
742     }
743     if (!ValueData)
744       ValueData = std::make_unique<ValueProfData>(*RHS.ValueData);
745     else
746       *ValueData = *RHS.ValueData;
747     return *this;
748   }
749 
750   /// Return the number of value profile kinds with non-zero number
751   /// of profile sites.
752   inline uint32_t getNumValueKinds() const;
753   /// Return the number of instrumented sites for ValueKind.
754   inline uint32_t getNumValueSites(uint32_t ValueKind) const;
755 
756   /// Return the total number of ValueData for ValueKind.
757   inline uint32_t getNumValueData(uint32_t ValueKind) const;
758 
759   /// Return the number of value data collected for ValueKind at profiling
760   /// site: Site.
761   inline uint32_t getNumValueDataForSite(uint32_t ValueKind,
762                                          uint32_t Site) const;
763 
764   /// Return the array of profiled values at \p Site. If \p TotalC
765   /// is not null, the total count of all target values at this site
766   /// will be stored in \c *TotalC.
767   inline std::unique_ptr<InstrProfValueData[]>
768   getValueForSite(uint32_t ValueKind, uint32_t Site,
769                   uint64_t *TotalC = nullptr) const;
770 
771   /// Get the target value/counts of kind \p ValueKind collected at site
772   /// \p Site and store the result in array \p Dest. Return the total
773   /// counts of all target values at this site.
774   inline uint64_t getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind,
775                                   uint32_t Site) const;
776 
777   /// Reserve space for NumValueSites sites.
778   inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites);
779 
780   /// Add ValueData for ValueKind at value Site.
781   void addValueData(uint32_t ValueKind, uint32_t Site,
782                     InstrProfValueData *VData, uint32_t N,
783                     InstrProfSymtab *SymTab);
784 
785   /// Merge the counts in \p Other into this one.
786   /// Optionally scale merged counts by \p Weight.
787   void merge(InstrProfRecord &Other, uint64_t Weight,
788              function_ref<void(instrprof_error)> Warn);
789 
790   /// Scale up profile counts (including value profile data) by
791   /// a factor of (N / D).
792   void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn);
793 
794   /// Sort value profile data (per site) by count.
795   void sortValueData() {
796     for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
797       for (auto &SR : getValueSitesForKind(Kind))
798         SR.sortByCount();
799   }
800 
801   /// Clear value data entries and edge counters.
802   void Clear() {
803     Counts.clear();
804     clearValueData();
805   }
806 
807   /// Clear value data entries
808   void clearValueData() { ValueData = nullptr; }
809 
810   /// Compute the sums of all counts and store in Sum.
811   void accumulateCounts(CountSumOrPercent &Sum) const;
812 
813   /// Compute the overlap b/w this IntrprofRecord and Other.
814   void overlap(InstrProfRecord &Other, OverlapStats &Overlap,
815                OverlapStats &FuncLevelOverlap, uint64_t ValueCutoff);
816 
817   /// Compute the overlap of value profile counts.
818   void overlapValueProfData(uint32_t ValueKind, InstrProfRecord &Src,
819                             OverlapStats &Overlap,
820                             OverlapStats &FuncLevelOverlap);
821 
822 private:
823   struct ValueProfData {
824     std::vector<InstrProfValueSiteRecord> IndirectCallSites;
825     std::vector<InstrProfValueSiteRecord> MemOPSizes;
826   };
827   std::unique_ptr<ValueProfData> ValueData;
828 
829   MutableArrayRef<InstrProfValueSiteRecord>
830   getValueSitesForKind(uint32_t ValueKind) {
831     // Cast to /add/ const (should be an implicit_cast, ideally, if that's ever
832     // implemented in LLVM) to call the const overload of this function, then
833     // cast away the constness from the result.
834     auto AR = const_cast<const InstrProfRecord *>(this)->getValueSitesForKind(
835         ValueKind);
836     return makeMutableArrayRef(
837         const_cast<InstrProfValueSiteRecord *>(AR.data()), AR.size());
838   }
839   ArrayRef<InstrProfValueSiteRecord>
840   getValueSitesForKind(uint32_t ValueKind) const {
841     if (!ValueData)
842       return None;
843     switch (ValueKind) {
844     case IPVK_IndirectCallTarget:
845       return ValueData->IndirectCallSites;
846     case IPVK_MemOPSize:
847       return ValueData->MemOPSizes;
848     default:
849       llvm_unreachable("Unknown value kind!");
850     }
851   }
852 
853   std::vector<InstrProfValueSiteRecord> &
854   getOrCreateValueSitesForKind(uint32_t ValueKind) {
855     if (!ValueData)
856       ValueData = std::make_unique<ValueProfData>();
857     switch (ValueKind) {
858     case IPVK_IndirectCallTarget:
859       return ValueData->IndirectCallSites;
860     case IPVK_MemOPSize:
861       return ValueData->MemOPSizes;
862     default:
863       llvm_unreachable("Unknown value kind!");
864     }
865   }
866 
867   // Map indirect call target name hash to name string.
868   uint64_t remapValue(uint64_t Value, uint32_t ValueKind,
869                       InstrProfSymtab *SymTab);
870 
871   // Merge Value Profile data from Src record to this record for ValueKind.
872   // Scale merged value counts by \p Weight.
873   void mergeValueProfData(uint32_t ValkeKind, InstrProfRecord &Src,
874                           uint64_t Weight,
875                           function_ref<void(instrprof_error)> Warn);
876 
877   // Scale up value profile data count by N (Numerator) / D (Denominator).
878   void scaleValueProfData(uint32_t ValueKind, uint64_t N, uint64_t D,
879                           function_ref<void(instrprof_error)> Warn);
880 };
881 
882 struct NamedInstrProfRecord : InstrProfRecord {
883   StringRef Name;
884   uint64_t Hash;
885 
886   // We reserve this bit as the flag for context sensitive profile record.
887   static const int CS_FLAG_IN_FUNC_HASH = 60;
888 
889   NamedInstrProfRecord() = default;
890   NamedInstrProfRecord(StringRef Name, uint64_t Hash,
891                        std::vector<uint64_t> Counts)
892       : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {}
893 
894   static bool hasCSFlagInHash(uint64_t FuncHash) {
895     return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1);
896   }
897   static void setCSFlagInHash(uint64_t &FuncHash) {
898     FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH);
899   }
900 };
901 
902 uint32_t InstrProfRecord::getNumValueKinds() const {
903   uint32_t NumValueKinds = 0;
904   for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
905     NumValueKinds += !(getValueSitesForKind(Kind).empty());
906   return NumValueKinds;
907 }
908 
909 uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const {
910   uint32_t N = 0;
911   for (auto &SR : getValueSitesForKind(ValueKind))
912     N += SR.ValueData.size();
913   return N;
914 }
915 
916 uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const {
917   return getValueSitesForKind(ValueKind).size();
918 }
919 
920 uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind,
921                                                  uint32_t Site) const {
922   return getValueSitesForKind(ValueKind)[Site].ValueData.size();
923 }
924 
925 std::unique_ptr<InstrProfValueData[]>
926 InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site,
927                                  uint64_t *TotalC) const {
928   uint64_t Dummy = 0;
929   uint64_t &TotalCount = (TotalC == nullptr ? Dummy : *TotalC);
930   uint32_t N = getNumValueDataForSite(ValueKind, Site);
931   if (N == 0) {
932     TotalCount = 0;
933     return std::unique_ptr<InstrProfValueData[]>(nullptr);
934   }
935 
936   auto VD = std::make_unique<InstrProfValueData[]>(N);
937   TotalCount = getValueForSite(VD.get(), ValueKind, Site);
938 
939   return VD;
940 }
941 
942 uint64_t InstrProfRecord::getValueForSite(InstrProfValueData Dest[],
943                                           uint32_t ValueKind,
944                                           uint32_t Site) const {
945   uint32_t I = 0;
946   uint64_t TotalCount = 0;
947   for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) {
948     Dest[I].Value = V.Value;
949     Dest[I].Count = V.Count;
950     TotalCount = SaturatingAdd(TotalCount, V.Count);
951     I++;
952   }
953   return TotalCount;
954 }
955 
956 void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) {
957   if (!NumValueSites)
958     return;
959   getOrCreateValueSitesForKind(ValueKind).reserve(NumValueSites);
960 }
961 
962 inline support::endianness getHostEndianness() {
963   return sys::IsLittleEndianHost ? support::little : support::big;
964 }
965 
966 // Include definitions for value profile data
967 #define INSTR_PROF_VALUE_PROF_DATA
968 #include "llvm/ProfileData/InstrProfData.inc"
969 
970 void InstrProfValueSiteRecord::sortByCount() {
971   ValueData.sort(
972       [](const InstrProfValueData &left, const InstrProfValueData &right) {
973         return left.Count > right.Count;
974       });
975   // Now truncate
976   size_t max_s = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
977   if (ValueData.size() > max_s)
978     ValueData.resize(max_s);
979 }
980 
981 namespace IndexedInstrProf {
982 
983 enum class HashT : uint32_t {
984   MD5,
985   Last = MD5
986 };
987 
988 inline uint64_t ComputeHash(HashT Type, StringRef K) {
989   switch (Type) {
990   case HashT::MD5:
991     return MD5Hash(K);
992   }
993   llvm_unreachable("Unhandled hash type");
994 }
995 
996 const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
997 
998 enum ProfVersion {
999   // Version 1 is the first version. In this version, the value of
1000   // a key/value pair can only include profile data of a single function.
1001   // Due to this restriction, the number of block counters for a given
1002   // function is not recorded but derived from the length of the value.
1003   Version1 = 1,
1004   // The version 2 format supports recording profile data of multiple
1005   // functions which share the same key in one value field. To support this,
1006   // the number block counters is recorded as an uint64_t field right after the
1007   // function structural hash.
1008   Version2 = 2,
1009   // Version 3 supports value profile data. The value profile data is expected
1010   // to follow the block counter profile data.
1011   Version3 = 3,
1012   // In this version, profile summary data \c IndexedInstrProf::Summary is
1013   // stored after the profile header.
1014   Version4 = 4,
1015   // In this version, the frontend PGO stable hash algorithm defaults to V2.
1016   Version5 = 5,
1017   // In this version, the frontend PGO stable hash algorithm got fixed and
1018   // may produce hashes different from Version5.
1019   Version6 = 6,
1020   // An additional counter is added around logical operators.
1021   Version7 = 7,
1022   // An additional (optional) memory profile type is added.
1023   Version8 = 8,
1024   // The current version is 8.
1025   CurrentVersion = INSTR_PROF_INDEX_VERSION
1026 };
1027 const uint64_t Version = ProfVersion::CurrentVersion;
1028 
1029 const HashT HashType = HashT::MD5;
1030 
1031 inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); }
1032 
1033 // This structure defines the file header of the LLVM profile
1034 // data file in indexed-format.
1035 struct Header {
1036   uint64_t Magic;
1037   uint64_t Version;
1038   uint64_t Unused; // Becomes unused since version 4
1039   uint64_t HashType;
1040   uint64_t HashOffset;
1041   uint64_t MemProfOffset;
1042   // New fields should only be added at the end to ensure that the size
1043   // computation is correct. The methods below need to be updated to ensure that
1044   // the new field is read correctly.
1045 
1046   // Reads a header struct from the buffer.
1047   static Expected<Header> readFromBuffer(const unsigned char *Buffer);
1048 
1049   // Returns the size of the header in bytes for all valid fields based on the
1050   // version. I.e a older version header will return a smaller size.
1051   size_t size() const;
1052 
1053   // Returns the format version in little endian. The header retains the version
1054   // in native endian of the compiler runtime.
1055   uint64_t formatVersion() const;
1056 };
1057 
1058 // Profile summary data recorded in the profile data file in indexed
1059 // format. It is introduced in version 4. The summary data follows
1060 // right after the profile file header.
1061 struct Summary {
1062   struct Entry {
1063     uint64_t Cutoff; ///< The required percentile of total execution count.
1064     uint64_t
1065         MinBlockCount;  ///< The minimum execution count for this percentile.
1066     uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count.
1067   };
1068   // The field kind enumerator to assigned value mapping should remain
1069   // unchanged  when a new kind is added or an old kind gets deleted in
1070   // the future.
1071   enum SummaryFieldKind {
1072     /// The total number of functions instrumented.
1073     TotalNumFunctions = 0,
1074     /// Total number of instrumented blocks/edges.
1075     TotalNumBlocks = 1,
1076     /// The maximal execution count among all functions.
1077     /// This field does not exist for profile data from IR based
1078     /// instrumentation.
1079     MaxFunctionCount = 2,
1080     /// Max block count of the program.
1081     MaxBlockCount = 3,
1082     /// Max internal block count of the program (excluding entry blocks).
1083     MaxInternalBlockCount = 4,
1084     /// The sum of all instrumented block counts.
1085     TotalBlockCount = 5,
1086     NumKinds = TotalBlockCount + 1
1087   };
1088 
1089   // The number of summmary fields following the summary header.
1090   uint64_t NumSummaryFields;
1091   // The number of Cutoff Entries (Summary::Entry) following summary fields.
1092   uint64_t NumCutoffEntries;
1093 
1094   Summary() = delete;
1095   Summary(uint32_t Size) { memset(this, 0, Size); }
1096 
1097   void operator delete(void *ptr) { ::operator delete(ptr); }
1098 
1099   static uint32_t getSize(uint32_t NumSumFields, uint32_t NumCutoffEntries) {
1100     return sizeof(Summary) + NumCutoffEntries * sizeof(Entry) +
1101            NumSumFields * sizeof(uint64_t);
1102   }
1103 
1104   const uint64_t *getSummaryDataBase() const {
1105     return reinterpret_cast<const uint64_t *>(this + 1);
1106   }
1107 
1108   uint64_t *getSummaryDataBase() {
1109     return reinterpret_cast<uint64_t *>(this + 1);
1110   }
1111 
1112   const Entry *getCutoffEntryBase() const {
1113     return reinterpret_cast<const Entry *>(
1114         &getSummaryDataBase()[NumSummaryFields]);
1115   }
1116 
1117   Entry *getCutoffEntryBase() {
1118     return reinterpret_cast<Entry *>(&getSummaryDataBase()[NumSummaryFields]);
1119   }
1120 
1121   uint64_t get(SummaryFieldKind K) const {
1122     return getSummaryDataBase()[K];
1123   }
1124 
1125   void set(SummaryFieldKind K, uint64_t V) {
1126     getSummaryDataBase()[K] = V;
1127   }
1128 
1129   const Entry &getEntry(uint32_t I) const { return getCutoffEntryBase()[I]; }
1130 
1131   void setEntry(uint32_t I, const ProfileSummaryEntry &E) {
1132     Entry &ER = getCutoffEntryBase()[I];
1133     ER.Cutoff = E.Cutoff;
1134     ER.MinBlockCount = E.MinCount;
1135     ER.NumBlocks = E.NumCounts;
1136   }
1137 };
1138 
1139 inline std::unique_ptr<Summary> allocSummary(uint32_t TotalSize) {
1140   return std::unique_ptr<Summary>(new (::operator new(TotalSize))
1141                                       Summary(TotalSize));
1142 }
1143 
1144 } // end namespace IndexedInstrProf
1145 
1146 namespace RawInstrProf {
1147 
1148 // Version 1: First version
1149 // Version 2: Added value profile data section. Per-function control data
1150 // struct has more fields to describe value profile information.
1151 // Version 3: Compressed name section support. Function PGO name reference
1152 // from control data struct is changed from raw pointer to Name's MD5 value.
1153 // Version 4: ValueDataBegin and ValueDataSizes fields are removed from the
1154 // raw header.
1155 // Version 5: Bit 60 of FuncHash is reserved for the flag for the context
1156 // sensitive records.
1157 // Version 6: Added binary id.
1158 // Version 7: Reorder binary id and include version in signature.
1159 // Version 8: Use relative counter pointer.
1160 const uint64_t Version = INSTR_PROF_RAW_VERSION;
1161 
1162 template <class IntPtrT> inline uint64_t getMagic();
1163 template <> inline uint64_t getMagic<uint64_t>() {
1164   return INSTR_PROF_RAW_MAGIC_64;
1165 }
1166 
1167 template <> inline uint64_t getMagic<uint32_t>() {
1168   return INSTR_PROF_RAW_MAGIC_32;
1169 }
1170 
1171 // Per-function profile data header/control structure.
1172 // The definition should match the structure defined in
1173 // compiler-rt/lib/profile/InstrProfiling.h.
1174 // It should also match the synthesized type in
1175 // Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters.
1176 template <class IntPtrT> struct alignas(8) ProfileData {
1177   #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name;
1178   #include "llvm/ProfileData/InstrProfData.inc"
1179 };
1180 
1181 // File header structure of the LLVM profile data in raw format.
1182 // The definition should match the header referenced in
1183 // compiler-rt/lib/profile/InstrProfilingFile.c  and
1184 // InstrProfilingBuffer.c.
1185 struct Header {
1186 #define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name;
1187 #include "llvm/ProfileData/InstrProfData.inc"
1188 };
1189 
1190 } // end namespace RawInstrProf
1191 
1192 // Parse MemOP Size range option.
1193 void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
1194                                  int64_t &RangeLast);
1195 
1196 // Create the variable for the profile file name.
1197 void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
1198 
1199 // Whether to compress function names in profile records, and filenames in
1200 // code coverage mappings. Used by the Instrumentation library and unit tests.
1201 extern cl::opt<bool> DoInstrProfNameCompression;
1202 
1203 } // end namespace llvm
1204 #endif // LLVM_PROFILEDATA_INSTRPROF_H
1205