1 //===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // llvm-profdata merges .profdata files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/SmallSet.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/IR/LLVMContext.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/ProfileData/InstrProfCorrelator.h"
19 #include "llvm/ProfileData/InstrProfReader.h"
20 #include "llvm/ProfileData/InstrProfWriter.h"
21 #include "llvm/ProfileData/MemProf.h"
22 #include "llvm/ProfileData/ProfileCommon.h"
23 #include "llvm/ProfileData/RawMemProfReader.h"
24 #include "llvm/ProfileData/SampleProfReader.h"
25 #include "llvm/ProfileData/SampleProfWriter.h"
26 #include "llvm/Support/BalancedPartitioning.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/Discriminator.h"
29 #include "llvm/Support/Errc.h"
30 #include "llvm/Support/FileSystem.h"
31 #include "llvm/Support/Format.h"
32 #include "llvm/Support/FormattedStream.h"
33 #include "llvm/Support/InitLLVM.h"
34 #include "llvm/Support/LLVMDriver.h"
35 #include "llvm/Support/MD5.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/ThreadPool.h"
39 #include "llvm/Support/Threading.h"
40 #include "llvm/Support/VirtualFileSystem.h"
41 #include "llvm/Support/WithColor.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <algorithm>
44 #include <cmath>
45 #include <optional>
46 #include <queue>
47 
48 using namespace llvm;
49 
50 // We use this string to indicate that there are
51 // multiple static functions map to the same name.
52 const std::string DuplicateNameStr = "----";
53 
54 enum ProfileFormat {
55   PF_None = 0,
56   PF_Text,
57   PF_Compact_Binary, // Deprecated
58   PF_Ext_Binary,
59   PF_GCC,
60   PF_Binary
61 };
62 
63 enum class ShowFormat { Text, Json, Yaml };
64 
65 static void warn(Twine Message, std::string Whence = "",
66                  std::string Hint = "") {
67   WithColor::warning();
68   if (!Whence.empty())
69     errs() << Whence << ": ";
70   errs() << Message << "\n";
71   if (!Hint.empty())
72     WithColor::note() << Hint << "\n";
73 }
74 
75 static void warn(Error E, StringRef Whence = "") {
76   if (E.isA<InstrProfError>()) {
77     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
78       warn(IPE.message(), std::string(Whence), std::string(""));
79     });
80   }
81 }
82 
83 static void exitWithError(Twine Message, std::string Whence = "",
84                           std::string Hint = "") {
85   WithColor::error();
86   if (!Whence.empty())
87     errs() << Whence << ": ";
88   errs() << Message << "\n";
89   if (!Hint.empty())
90     WithColor::note() << Hint << "\n";
91   ::exit(1);
92 }
93 
94 static void exitWithError(Error E, StringRef Whence = "") {
95   if (E.isA<InstrProfError>()) {
96     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
97       instrprof_error instrError = IPE.get();
98       StringRef Hint = "";
99       if (instrError == instrprof_error::unrecognized_format) {
100         // Hint in case user missed specifying the profile type.
101         Hint = "Perhaps you forgot to use the --sample or --memory option?";
102       }
103       exitWithError(IPE.message(), std::string(Whence), std::string(Hint));
104     });
105     return;
106   }
107 
108   exitWithError(toString(std::move(E)), std::string(Whence));
109 }
110 
111 static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
112   exitWithError(EC.message(), std::string(Whence));
113 }
114 
115 namespace {
116 enum ProfileKinds { instr, sample, memory };
117 enum FailureMode { failIfAnyAreInvalid, failIfAllAreInvalid };
118 }
119 
120 static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC,
121                                  StringRef Whence = "") {
122   if (FailMode == failIfAnyAreInvalid)
123     exitWithErrorCode(EC, Whence);
124   else
125     warn(EC.message(), std::string(Whence));
126 }
127 
128 static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
129                                    StringRef WhenceFunction = "",
130                                    bool ShowHint = true) {
131   if (!WhenceFile.empty())
132     errs() << WhenceFile << ": ";
133   if (!WhenceFunction.empty())
134     errs() << WhenceFunction << ": ";
135 
136   auto IPE = instrprof_error::success;
137   E = handleErrors(std::move(E),
138                    [&IPE](std::unique_ptr<InstrProfError> E) -> Error {
139                      IPE = E->get();
140                      return Error(std::move(E));
141                    });
142   errs() << toString(std::move(E)) << "\n";
143 
144   if (ShowHint) {
145     StringRef Hint = "";
146     if (IPE != instrprof_error::success) {
147       switch (IPE) {
148       case instrprof_error::hash_mismatch:
149       case instrprof_error::count_mismatch:
150       case instrprof_error::value_site_count_mismatch:
151         Hint = "Make sure that all profile data to be merged is generated "
152                "from the same binary.";
153         break;
154       default:
155         break;
156       }
157     }
158 
159     if (!Hint.empty())
160       errs() << Hint << "\n";
161   }
162 }
163 
164 namespace {
165 /// A remapper from original symbol names to new symbol names based on a file
166 /// containing a list of mappings from old name to new name.
167 class SymbolRemapper {
168   std::unique_ptr<MemoryBuffer> File;
169   DenseMap<StringRef, StringRef> RemappingTable;
170 
171 public:
172   /// Build a SymbolRemapper from a file containing a list of old/new symbols.
173   static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
174     auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
175     if (!BufOrError)
176       exitWithErrorCode(BufOrError.getError(), InputFile);
177 
178     auto Remapper = std::make_unique<SymbolRemapper>();
179     Remapper->File = std::move(BufOrError.get());
180 
181     for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
182          !LineIt.is_at_eof(); ++LineIt) {
183       std::pair<StringRef, StringRef> Parts = LineIt->split(' ');
184       if (Parts.first.empty() || Parts.second.empty() ||
185           Parts.second.count(' ')) {
186         exitWithError("unexpected line in remapping file",
187                       (InputFile + ":" + Twine(LineIt.line_number())).str(),
188                       "expected 'old_symbol new_symbol'");
189       }
190       Remapper->RemappingTable.insert(Parts);
191     }
192     return Remapper;
193   }
194 
195   /// Attempt to map the given old symbol into a new symbol.
196   ///
197   /// \return The new symbol, or \p Name if no such symbol was found.
198   StringRef operator()(StringRef Name) {
199     StringRef New = RemappingTable.lookup(Name);
200     return New.empty() ? Name : New;
201   }
202 };
203 }
204 
205 struct WeightedFile {
206   std::string Filename;
207   uint64_t Weight;
208 };
209 typedef SmallVector<WeightedFile, 5> WeightedFileVector;
210 
211 /// Keep track of merged data and reported errors.
212 struct WriterContext {
213   std::mutex Lock;
214   InstrProfWriter Writer;
215   std::vector<std::pair<Error, std::string>> Errors;
216   std::mutex &ErrLock;
217   SmallSet<instrprof_error, 4> &WriterErrorCodes;
218 
219   WriterContext(bool IsSparse, std::mutex &ErrLock,
220                 SmallSet<instrprof_error, 4> &WriterErrorCodes,
221                 uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
222       : Writer(IsSparse, ReservoirSize, MaxTraceLength), ErrLock(ErrLock),
223         WriterErrorCodes(WriterErrorCodes) {}
224 };
225 
226 /// Computer the overlap b/w profile BaseFilename and TestFileName,
227 /// and store the program level result to Overlap.
228 static void overlapInput(const std::string &BaseFilename,
229                          const std::string &TestFilename, WriterContext *WC,
230                          OverlapStats &Overlap,
231                          const OverlapFuncFilters &FuncFilter,
232                          raw_fd_ostream &OS, bool IsCS) {
233   auto FS = vfs::getRealFileSystem();
234   auto ReaderOrErr = InstrProfReader::create(TestFilename, *FS);
235   if (Error E = ReaderOrErr.takeError()) {
236     // Skip the empty profiles by returning sliently.
237     auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
238     if (ErrorCode != instrprof_error::empty_raw_profile)
239       WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
240                               TestFilename);
241     return;
242   }
243 
244   auto Reader = std::move(ReaderOrErr.get());
245   for (auto &I : *Reader) {
246     OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
247     FuncOverlap.setFuncInfo(I.Name, I.Hash);
248 
249     WC->Writer.overlapRecord(std::move(I), Overlap, FuncOverlap, FuncFilter);
250     FuncOverlap.dump(OS);
251   }
252 }
253 
254 /// Load an input into a writer context.
255 static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
256                       const InstrProfCorrelator *Correlator,
257                       const StringRef ProfiledBinary, WriterContext *WC) {
258   std::unique_lock<std::mutex> CtxGuard{WC->Lock};
259 
260   // Copy the filename, because llvm::ThreadPool copied the input "const
261   // WeightedFile &" by value, making a reference to the filename within it
262   // invalid outside of this packaged task.
263   std::string Filename = Input.Filename;
264 
265   using ::llvm::memprof::RawMemProfReader;
266   if (RawMemProfReader::hasFormat(Input.Filename)) {
267     auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary);
268     if (!ReaderOrErr) {
269       exitWithError(ReaderOrErr.takeError(), Input.Filename);
270     }
271     std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
272     // Check if the profile types can be merged, e.g. clang frontend profiles
273     // should not be merged with memprof profiles.
274     if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
275       consumeError(std::move(E));
276       WC->Errors.emplace_back(
277           make_error<StringError>(
278               "Cannot merge MemProf profile with Clang generated profile.",
279               std::error_code()),
280           Filename);
281       return;
282     }
283 
284     auto MemProfError = [&](Error E) {
285       auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
286       WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
287                               Filename);
288     };
289 
290     // Add the frame mappings into the writer context.
291     const auto &IdToFrame = Reader->getFrameMapping();
292     for (const auto &I : IdToFrame) {
293       bool Succeeded = WC->Writer.addMemProfFrame(
294           /*Id=*/I.first, /*Frame=*/I.getSecond(), MemProfError);
295       // If we weren't able to add the frame mappings then it doesn't make sense
296       // to try to add the records from this profile.
297       if (!Succeeded)
298         return;
299     }
300     const auto &FunctionProfileData = Reader->getProfileData();
301     // Add the memprof records into the writer context.
302     for (const auto &I : FunctionProfileData) {
303       WC->Writer.addMemProfRecord(/*Id=*/I.first, /*Record=*/I.second);
304     }
305     return;
306   }
307 
308   auto FS = vfs::getRealFileSystem();
309   auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator);
310   if (Error E = ReaderOrErr.takeError()) {
311     // Skip the empty profiles by returning silently.
312     auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
313     if (ErrCode != instrprof_error::empty_raw_profile)
314       WC->Errors.emplace_back(make_error<InstrProfError>(ErrCode, Msg),
315                               Filename);
316     return;
317   }
318 
319   auto Reader = std::move(ReaderOrErr.get());
320   if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
321     consumeError(std::move(E));
322     WC->Errors.emplace_back(
323         make_error<StringError>(
324             "Merge IR generated profile with Clang generated profile.",
325             std::error_code()),
326         Filename);
327     return;
328   }
329 
330   for (auto &I : *Reader) {
331     if (Remapper)
332       I.Name = (*Remapper)(I.Name);
333     const StringRef FuncName = I.Name;
334     bool Reported = false;
335     WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) {
336       if (Reported) {
337         consumeError(std::move(E));
338         return;
339       }
340       Reported = true;
341       // Only show hint the first time an error occurs.
342       auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
343       std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
344       bool firstTime = WC->WriterErrorCodes.insert(ErrCode).second;
345       handleMergeWriterError(make_error<InstrProfError>(ErrCode, Msg),
346                              Input.Filename, FuncName, firstTime);
347     });
348   }
349 
350   if (Reader->hasTemporalProfile()) {
351     auto &Traces = Reader->getTemporalProfTraces(Input.Weight);
352     if (!Traces.empty())
353       WC->Writer.addTemporalProfileTraces(
354           Traces, Reader->getTemporalProfTraceStreamSize());
355   }
356   if (Reader->hasError()) {
357     if (Error E = Reader->getError())
358       WC->Errors.emplace_back(std::move(E), Filename);
359   }
360 
361   std::vector<llvm::object::BuildID> BinaryIds;
362   if (Error E = Reader->readBinaryIds(BinaryIds))
363     WC->Errors.emplace_back(std::move(E), Filename);
364   WC->Writer.addBinaryIds(BinaryIds);
365 }
366 
367 /// Merge the \p Src writer context into \p Dst.
368 static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
369   for (auto &ErrorPair : Src->Errors)
370     Dst->Errors.push_back(std::move(ErrorPair));
371   Src->Errors.clear();
372 
373   if (Error E = Dst->Writer.mergeProfileKind(Src->Writer.getProfileKind()))
374     exitWithError(std::move(E));
375 
376   Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) {
377     auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
378     std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
379     bool firstTime = Dst->WriterErrorCodes.insert(ErrorCode).second;
380     if (firstTime)
381       warn(toString(make_error<InstrProfError>(ErrorCode, Msg)));
382   });
383 }
384 
385 static void writeInstrProfile(StringRef OutputFilename,
386                               ProfileFormat OutputFormat,
387                               InstrProfWriter &Writer) {
388   std::error_code EC;
389   raw_fd_ostream Output(OutputFilename.data(), EC,
390                         OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF
391                                                 : sys::fs::OF_None);
392   if (EC)
393     exitWithErrorCode(EC, OutputFilename);
394 
395   if (OutputFormat == PF_Text) {
396     if (Error E = Writer.writeText(Output))
397       warn(std::move(E));
398   } else {
399     if (Output.is_displayed())
400       exitWithError("cannot write a non-text format profile to the terminal");
401     if (Error E = Writer.write(Output))
402       warn(std::move(E));
403   }
404 }
405 
406 static void
407 mergeInstrProfile(const WeightedFileVector &Inputs, StringRef DebugInfoFilename,
408                   SymbolRemapper *Remapper, StringRef OutputFilename,
409                   ProfileFormat OutputFormat, uint64_t TraceReservoirSize,
410                   uint64_t MaxTraceLength, bool OutputSparse,
411                   unsigned NumThreads, FailureMode FailMode,
412                   const StringRef ProfiledBinary) {
413   if (OutputFormat == PF_Compact_Binary)
414     exitWithError("Compact Binary is deprecated");
415   if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary &&
416       OutputFormat != PF_Text)
417     exitWithError("unknown format is specified");
418 
419   std::unique_ptr<InstrProfCorrelator> Correlator;
420   if (!DebugInfoFilename.empty()) {
421     if (auto Err =
422             InstrProfCorrelator::get(DebugInfoFilename).moveInto(Correlator))
423       exitWithError(std::move(Err), DebugInfoFilename);
424     if (auto Err = Correlator->correlateProfileData())
425       exitWithError(std::move(Err), DebugInfoFilename);
426   }
427 
428   std::mutex ErrorLock;
429   SmallSet<instrprof_error, 4> WriterErrorCodes;
430 
431   // If NumThreads is not specified, auto-detect a good default.
432   if (NumThreads == 0)
433     NumThreads = std::min(hardware_concurrency().compute_thread_count(),
434                           unsigned((Inputs.size() + 1) / 2));
435 
436   // Initialize the writer contexts.
437   SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
438   for (unsigned I = 0; I < NumThreads; ++I)
439     Contexts.emplace_back(std::make_unique<WriterContext>(
440         OutputSparse, ErrorLock, WriterErrorCodes, TraceReservoirSize,
441         MaxTraceLength));
442 
443   if (NumThreads == 1) {
444     for (const auto &Input : Inputs)
445       loadInput(Input, Remapper, Correlator.get(), ProfiledBinary,
446                 Contexts[0].get());
447   } else {
448     ThreadPool Pool(hardware_concurrency(NumThreads));
449 
450     // Load the inputs in parallel (N/NumThreads serial steps).
451     unsigned Ctx = 0;
452     for (const auto &Input : Inputs) {
453       Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary,
454                  Contexts[Ctx].get());
455       Ctx = (Ctx + 1) % NumThreads;
456     }
457     Pool.wait();
458 
459     // Merge the writer contexts together (~ lg(NumThreads) serial steps).
460     unsigned Mid = Contexts.size() / 2;
461     unsigned End = Contexts.size();
462     assert(Mid > 0 && "Expected more than one context");
463     do {
464       for (unsigned I = 0; I < Mid; ++I)
465         Pool.async(mergeWriterContexts, Contexts[I].get(),
466                    Contexts[I + Mid].get());
467       Pool.wait();
468       if (End & 1) {
469         Pool.async(mergeWriterContexts, Contexts[0].get(),
470                    Contexts[End - 1].get());
471         Pool.wait();
472       }
473       End = Mid;
474       Mid /= 2;
475     } while (Mid > 0);
476   }
477 
478   // Handle deferred errors encountered during merging. If the number of errors
479   // is equal to the number of inputs the merge failed.
480   unsigned NumErrors = 0;
481   for (std::unique_ptr<WriterContext> &WC : Contexts) {
482     for (auto &ErrorPair : WC->Errors) {
483       ++NumErrors;
484       warn(toString(std::move(ErrorPair.first)), ErrorPair.second);
485     }
486   }
487   if (NumErrors == Inputs.size() ||
488       (NumErrors > 0 && FailMode == failIfAnyAreInvalid))
489     exitWithError("no profile can be merged");
490 
491   writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
492 }
493 
494 /// The profile entry for a function in instrumentation profile.
495 struct InstrProfileEntry {
496   uint64_t MaxCount = 0;
497   uint64_t NumEdgeCounters = 0;
498   float ZeroCounterRatio = 0.0;
499   InstrProfRecord *ProfRecord;
500   InstrProfileEntry(InstrProfRecord *Record);
501   InstrProfileEntry() = default;
502 };
503 
504 InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
505   ProfRecord = Record;
506   uint64_t CntNum = Record->Counts.size();
507   uint64_t ZeroCntNum = 0;
508   for (size_t I = 0; I < CntNum; ++I) {
509     MaxCount = std::max(MaxCount, Record->Counts[I]);
510     ZeroCntNum += !Record->Counts[I];
511   }
512   ZeroCounterRatio = (float)ZeroCntNum / CntNum;
513   NumEdgeCounters = CntNum;
514 }
515 
516 /// Either set all the counters in the instr profile entry \p IFE to
517 /// -1 / -2 /in order to drop the profile or scale up the
518 /// counters in \p IFP to be above hot / cold threshold. We use
519 /// the ratio of zero counters in the profile of a function to
520 /// decide the profile is helpful or harmful for performance,
521 /// and to choose whether to scale up or drop it.
522 static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot,
523                                     uint64_t HotInstrThreshold,
524                                     uint64_t ColdInstrThreshold,
525                                     float ZeroCounterThreshold) {
526   InstrProfRecord *ProfRecord = IFE.ProfRecord;
527   if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
528     // If all or most of the counters of the function are zero, the
529     // profile is unaccountable and should be dropped. Reset all the
530     // counters to be -1 / -2 and PGO profile-use will drop the profile.
531     // All counters being -1 also implies that the function is hot so
532     // PGO profile-use will also set the entry count metadata to be
533     // above hot threshold.
534     // All counters being -2 implies that the function is warm so
535     // PGO profile-use will also set the entry count metadata to be
536     // above cold threshold.
537     auto Kind =
538         (SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm);
539     ProfRecord->setPseudoCount(Kind);
540     return;
541   }
542 
543   // Scale up the MaxCount to be multiple times above hot / cold threshold.
544   const unsigned MultiplyFactor = 3;
545   uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold);
546   uint64_t Numerator = Threshold * MultiplyFactor;
547 
548   // Make sure Threshold for warm counters is below the HotInstrThreshold.
549   if (!SetToHot && Threshold >= HotInstrThreshold) {
550     Threshold = (HotInstrThreshold + ColdInstrThreshold) / 2;
551   }
552 
553   uint64_t Denominator = IFE.MaxCount;
554   if (Numerator <= Denominator)
555     return;
556   ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) {
557     warn(toString(make_error<InstrProfError>(E)));
558   });
559 }
560 
561 const uint64_t ColdPercentileIdx = 15;
562 const uint64_t HotPercentileIdx = 11;
563 
564 using sampleprof::FSDiscriminatorPass;
565 
566 // Internal options to set FSDiscriminatorPass. Used in merge and show
567 // commands.
568 static cl::opt<FSDiscriminatorPass> FSDiscriminatorPassOption(
569     "fs-discriminator-pass", cl::init(PassLast), cl::Hidden,
570     cl::desc("Zero out the discriminator bits for the FS discrimiantor "
571              "pass beyond this value. The enum values are defined in "
572              "Support/Discriminator.h"),
573     cl::values(clEnumVal(Base, "Use base discriminators only"),
574                clEnumVal(Pass1, "Use base and pass 1 discriminators"),
575                clEnumVal(Pass2, "Use base and pass 1-2 discriminators"),
576                clEnumVal(Pass3, "Use base and pass 1-3 discriminators"),
577                clEnumVal(PassLast, "Use all discriminator bits (default)")));
578 
579 static unsigned getDiscriminatorMask() {
580   return getN1Bits(getFSPassBitEnd(FSDiscriminatorPassOption.getValue()));
581 }
582 
583 /// Adjust the instr profile in \p WC based on the sample profile in
584 /// \p Reader.
585 static void
586 adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
587                    std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
588                    unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
589                    unsigned InstrProfColdThreshold) {
590   // Function to its entry in instr profile.
591   StringMap<InstrProfileEntry> InstrProfileMap;
592   StringMap<StringRef> StaticFuncMap;
593   InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
594 
595   auto checkSampleProfileHasFUnique = [&Reader]() {
596     for (const auto &PD : Reader->getProfiles()) {
597       auto &FContext = PD.first;
598       if (FContext.toString().find(FunctionSamples::UniqSuffix) !=
599           std::string::npos) {
600         return true;
601       }
602     }
603     return false;
604   };
605 
606   bool SampleProfileHasFUnique = checkSampleProfileHasFUnique();
607 
608   auto buildStaticFuncMap = [&StaticFuncMap,
609                              SampleProfileHasFUnique](const StringRef Name) {
610     std::string Prefixes[] = {".cpp:", "cc:", ".c:", ".hpp:", ".h:"};
611     size_t PrefixPos = StringRef::npos;
612     for (auto &Prefix : Prefixes) {
613       PrefixPos = Name.find_insensitive(Prefix);
614       if (PrefixPos == StringRef::npos)
615         continue;
616       PrefixPos += Prefix.size();
617       break;
618     }
619 
620     if (PrefixPos == StringRef::npos) {
621       return;
622     }
623 
624     StringRef NewName = Name.drop_front(PrefixPos);
625     StringRef FName = Name.substr(0, PrefixPos - 1);
626     if (NewName.size() == 0) {
627       return;
628     }
629 
630     // This name should have a static linkage.
631     size_t PostfixPos = NewName.find(FunctionSamples::UniqSuffix);
632     bool ProfileHasFUnique = (PostfixPos != StringRef::npos);
633 
634     // If sample profile and instrumented profile do not agree on symbol
635     // uniqification.
636     if (SampleProfileHasFUnique != ProfileHasFUnique) {
637       // If instrumented profile uses -funique-internal-linakge-symbols,
638       // we need to trim the name.
639       if (ProfileHasFUnique) {
640         NewName = NewName.substr(0, PostfixPos);
641       } else {
642         // If sample profile uses -funique-internal-linakge-symbols,
643         // we build the map.
644         std::string NStr =
645             NewName.str() + getUniqueInternalLinkagePostfix(FName);
646         NewName = StringRef(NStr);
647         StaticFuncMap[NewName] = Name;
648         return;
649       }
650     }
651 
652     if (!StaticFuncMap.contains(NewName)) {
653       StaticFuncMap[NewName] = Name;
654     } else {
655       StaticFuncMap[NewName] = DuplicateNameStr;
656     }
657   };
658 
659   // We need to flatten the SampleFDO profile as the InstrFDO
660   // profile does not have inlined callsite profiles.
661   // One caveat is the pre-inlined function -- their samples
662   // should be collapsed into the caller function.
663   // Here we do a DFS traversal to get the flatten profile
664   // info: the sum of entrycount and the max of maxcount.
665   // Here is the algorithm:
666   //   recursive (FS, root_name) {
667   //      name = FS->getName();
668   //      get samples for FS;
669   //      if (InstrProf.find(name) {
670   //        root_name = name;
671   //      } else {
672   //        if (name is in static_func map) {
673   //          root_name = static_name;
674   //        }
675   //      }
676   //      update the Map entry for root_name;
677   //      for (subfs: FS) {
678   //        recursive(subfs, root_name);
679   //      }
680   //   }
681   //
682   // Here is an example.
683   //
684   // SampleProfile:
685   // foo:12345:1000
686   // 1: 1000
687   // 2.1: 1000
688   // 15: 5000
689   // 4: bar:1000
690   //  1: 1000
691   //  2: goo:3000
692   //   1: 3000
693   // 8: bar:40000
694   //  1: 10000
695   //  2: goo:30000
696   //   1: 30000
697   //
698   // InstrProfile has two entries:
699   //  foo
700   //  bar.cc:bar
701   //
702   // After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
703   // {"foo", {1000, 5000}}
704   // {"bar.cc:bar", {11000, 30000}}
705   //
706   // foo's has an entry count of 1000, and max body count of 5000.
707   // bar.cc:bar has an entry count of 11000 (sum two callsites of 1000 and
708   // 10000), and max count of 30000 (from the callsite in line 8).
709   //
710   // Note that goo's count will remain in bar.cc:bar() as it does not have an
711   // entry in InstrProfile.
712   DenseMap<StringRef, std::pair<uint64_t, uint64_t>> FlattenSampleMap;
713   auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap,
714                             &InstrProfileMap](const FunctionSamples &FS,
715                                               const StringRef &RootName) {
716     auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS,
717                                      const StringRef &RootName,
718                                      auto &BuildImpl) -> void {
719       const StringRef &Name = FS.getName();
720       const StringRef *NewRootName = &RootName;
721       uint64_t EntrySample = FS.getHeadSamplesEstimate();
722       uint64_t MaxBodySample = FS.getMaxCountInside(/* SkipCallSite*/ true);
723 
724       auto It = InstrProfileMap.find(Name);
725       if (It != InstrProfileMap.end()) {
726         NewRootName = &Name;
727       } else {
728         auto NewName = StaticFuncMap.find(Name);
729         if (NewName != StaticFuncMap.end()) {
730           It = InstrProfileMap.find(NewName->second.str());
731           if (NewName->second != DuplicateNameStr) {
732             NewRootName = &NewName->second;
733           }
734         } else {
735           // Here the EntrySample is of an inlined function, so we should not
736           // update the EntrySample in the map.
737           EntrySample = 0;
738         }
739       }
740       EntrySample += FlattenSampleMap[*NewRootName].first;
741       MaxBodySample =
742           std::max(FlattenSampleMap[*NewRootName].second, MaxBodySample);
743       FlattenSampleMap[*NewRootName] =
744           std::make_pair(EntrySample, MaxBodySample);
745 
746       for (const auto &C : FS.getCallsiteSamples())
747         for (const auto &F : C.second)
748           BuildImpl(F.second, *NewRootName, BuildImpl);
749     };
750     BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl);
751   };
752 
753   for (auto &PD : WC->Writer.getProfileData()) {
754     // Populate IPBuilder.
755     for (const auto &PDV : PD.getValue()) {
756       InstrProfRecord Record = PDV.second;
757       IPBuilder.addRecord(Record);
758     }
759 
760     // If a function has multiple entries in instr profile, skip it.
761     if (PD.getValue().size() != 1)
762       continue;
763 
764     // Initialize InstrProfileMap.
765     InstrProfRecord *R = &PD.getValue().begin()->second;
766     StringRef FullName = PD.getKey();
767     InstrProfileMap[FullName] = InstrProfileEntry(R);
768     buildStaticFuncMap(FullName);
769   }
770 
771   for (auto &PD : Reader->getProfiles()) {
772     sampleprof::FunctionSamples &FS = PD.second;
773     BuildMaxSampleMap(FS, FS.getName());
774   }
775 
776   ProfileSummary InstrPS = *IPBuilder.getSummary();
777   ProfileSummary SamplePS = Reader->getSummary();
778 
779   // Compute cold thresholds for instr profile and sample profile.
780   uint64_t HotSampleThreshold =
781       ProfileSummaryBuilder::getEntryForPercentile(
782           SamplePS.getDetailedSummary(),
783           ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
784           .MinCount;
785   uint64_t ColdSampleThreshold =
786       ProfileSummaryBuilder::getEntryForPercentile(
787           SamplePS.getDetailedSummary(),
788           ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
789           .MinCount;
790   uint64_t HotInstrThreshold =
791       ProfileSummaryBuilder::getEntryForPercentile(
792           InstrPS.getDetailedSummary(),
793           ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
794           .MinCount;
795   uint64_t ColdInstrThreshold =
796       InstrProfColdThreshold
797           ? InstrProfColdThreshold
798           : ProfileSummaryBuilder::getEntryForPercentile(
799                 InstrPS.getDetailedSummary(),
800                 ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
801                 .MinCount;
802 
803   // Find hot/warm functions in sample profile which is cold in instr profile
804   // and adjust the profiles of those functions in the instr profile.
805   for (const auto &E : FlattenSampleMap) {
806     uint64_t SampleMaxCount = std::max(E.second.first, E.second.second);
807     if (SampleMaxCount < ColdSampleThreshold)
808       continue;
809     const StringRef &Name = E.first;
810     auto It = InstrProfileMap.find(Name);
811     if (It == InstrProfileMap.end()) {
812       auto NewName = StaticFuncMap.find(Name);
813       if (NewName != StaticFuncMap.end()) {
814         It = InstrProfileMap.find(NewName->second.str());
815         if (NewName->second == DuplicateNameStr) {
816           WithColor::warning()
817               << "Static function " << Name
818               << " has multiple promoted names, cannot adjust profile.\n";
819         }
820       }
821     }
822     if (It == InstrProfileMap.end() ||
823         It->second.MaxCount > ColdInstrThreshold ||
824         It->second.NumEdgeCounters < SupplMinSizeThreshold)
825       continue;
826     bool SetToHot = SampleMaxCount >= HotSampleThreshold;
827     updateInstrProfileEntry(It->second, SetToHot, HotInstrThreshold,
828                             ColdInstrThreshold, ZeroCounterThreshold);
829   }
830 }
831 
832 /// The main function to supplement instr profile with sample profile.
833 /// \Inputs contains the instr profile. \p SampleFilename specifies the
834 /// sample profile. \p OutputFilename specifies the output profile name.
835 /// \p OutputFormat specifies the output profile format. \p OutputSparse
836 /// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
837 /// specifies the minimal size for the functions whose profile will be
838 /// adjusted. \p ZeroCounterThreshold is the threshold to check whether
839 /// a function contains too many zero counters and whether its profile
840 /// should be dropped. \p InstrProfColdThreshold is the user specified
841 /// cold threshold which will override the cold threshold got from the
842 /// instr profile summary.
843 static void supplementInstrProfile(
844     const WeightedFileVector &Inputs, StringRef SampleFilename,
845     StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse,
846     unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
847     unsigned InstrProfColdThreshold) {
848   if (OutputFilename.compare("-") == 0)
849     exitWithError("cannot write indexed profdata format to stdout");
850   if (Inputs.size() != 1)
851     exitWithError("expect one input to be an instr profile");
852   if (Inputs[0].Weight != 1)
853     exitWithError("expect instr profile doesn't have weight");
854 
855   StringRef InstrFilename = Inputs[0].Filename;
856 
857   // Read sample profile.
858   LLVMContext Context;
859   auto FS = vfs::getRealFileSystem();
860   auto ReaderOrErr = sampleprof::SampleProfileReader::create(
861       SampleFilename.str(), Context, *FS, FSDiscriminatorPassOption);
862   if (std::error_code EC = ReaderOrErr.getError())
863     exitWithErrorCode(EC, SampleFilename);
864   auto Reader = std::move(ReaderOrErr.get());
865   if (std::error_code EC = Reader->read())
866     exitWithErrorCode(EC, SampleFilename);
867 
868   // Read instr profile.
869   std::mutex ErrorLock;
870   SmallSet<instrprof_error, 4> WriterErrorCodes;
871   auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock,
872                                             WriterErrorCodes);
873   loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get());
874   if (WC->Errors.size() > 0)
875     exitWithError(std::move(WC->Errors[0].first), InstrFilename);
876 
877   adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
878                      InstrProfColdThreshold);
879   writeInstrProfile(OutputFilename, OutputFormat, WC->Writer);
880 }
881 
882 /// Make a copy of the given function samples with all symbol names remapped
883 /// by the provided symbol remapper.
884 static sampleprof::FunctionSamples
885 remapSamples(const sampleprof::FunctionSamples &Samples,
886              SymbolRemapper &Remapper, sampleprof_error &Error) {
887   sampleprof::FunctionSamples Result;
888   Result.setName(Remapper(Samples.getName()));
889   Result.addTotalSamples(Samples.getTotalSamples());
890   Result.addHeadSamples(Samples.getHeadSamples());
891   for (const auto &BodySample : Samples.getBodySamples()) {
892     uint32_t MaskedDiscriminator =
893         BodySample.first.Discriminator & getDiscriminatorMask();
894     Result.addBodySamples(BodySample.first.LineOffset, MaskedDiscriminator,
895                           BodySample.second.getSamples());
896     for (const auto &Target : BodySample.second.getCallTargets()) {
897       Result.addCalledTargetSamples(BodySample.first.LineOffset,
898                                     MaskedDiscriminator,
899                                     Remapper(Target.first()), Target.second);
900     }
901   }
902   for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
903     sampleprof::FunctionSamplesMap &Target =
904         Result.functionSamplesAt(CallsiteSamples.first);
905     for (const auto &Callsite : CallsiteSamples.second) {
906       sampleprof::FunctionSamples Remapped =
907           remapSamples(Callsite.second, Remapper, Error);
908       MergeResult(Error,
909                   Target[std::string(Remapped.getName())].merge(Remapped));
910     }
911   }
912   return Result;
913 }
914 
915 static sampleprof::SampleProfileFormat FormatMap[] = {
916     sampleprof::SPF_None,
917     sampleprof::SPF_Text,
918     sampleprof::SPF_None,
919     sampleprof::SPF_Ext_Binary,
920     sampleprof::SPF_GCC,
921     sampleprof::SPF_Binary};
922 
923 static std::unique_ptr<MemoryBuffer>
924 getInputFileBuf(const StringRef &InputFile) {
925   if (InputFile == "")
926     return {};
927 
928   auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
929   if (!BufOrError)
930     exitWithErrorCode(BufOrError.getError(), InputFile);
931 
932   return std::move(*BufOrError);
933 }
934 
935 static void populateProfileSymbolList(MemoryBuffer *Buffer,
936                                       sampleprof::ProfileSymbolList &PSL) {
937   if (!Buffer)
938     return;
939 
940   SmallVector<StringRef, 32> SymbolVec;
941   StringRef Data = Buffer->getBuffer();
942   Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
943 
944   for (StringRef SymbolStr : SymbolVec)
945     PSL.add(SymbolStr.trim());
946 }
947 
948 static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
949                                   ProfileFormat OutputFormat,
950                                   MemoryBuffer *Buffer,
951                                   sampleprof::ProfileSymbolList &WriterList,
952                                   bool CompressAllSections, bool UseMD5,
953                                   bool GenPartialProfile) {
954   populateProfileSymbolList(Buffer, WriterList);
955   if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
956     warn("Profile Symbol list is not empty but the output format is not "
957          "ExtBinary format. The list will be lost in the output. ");
958 
959   Writer.setProfileSymbolList(&WriterList);
960 
961   if (CompressAllSections) {
962     if (OutputFormat != PF_Ext_Binary)
963       warn("-compress-all-section is ignored. Specify -extbinary to enable it");
964     else
965       Writer.setToCompressAllSections();
966   }
967   if (UseMD5) {
968     if (OutputFormat != PF_Ext_Binary)
969       warn("-use-md5 is ignored. Specify -extbinary to enable it");
970     else
971       Writer.setUseMD5();
972   }
973   if (GenPartialProfile) {
974     if (OutputFormat != PF_Ext_Binary)
975       warn("-gen-partial-profile is ignored. Specify -extbinary to enable it");
976     else
977       Writer.setPartialProfile();
978   }
979 }
980 
981 static void
982 mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
983                    StringRef OutputFilename, ProfileFormat OutputFormat,
984                    StringRef ProfileSymbolListFile, bool CompressAllSections,
985                    bool UseMD5, bool GenPartialProfile,
986                    SampleProfileLayout ProfileLayout,
987                    bool SampleMergeColdContext, bool SampleTrimColdContext,
988                    bool SampleColdContextFrameDepth, FailureMode FailMode,
989                    bool DropProfileSymbolList, size_t OutputSizeLimit) {
990   using namespace sampleprof;
991   SampleProfileMap ProfileMap;
992   SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
993   LLVMContext Context;
994   sampleprof::ProfileSymbolList WriterList;
995   std::optional<bool> ProfileIsProbeBased;
996   std::optional<bool> ProfileIsCS;
997   for (const auto &Input : Inputs) {
998     auto FS = vfs::getRealFileSystem();
999     auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, *FS,
1000                                                    FSDiscriminatorPassOption);
1001     if (std::error_code EC = ReaderOrErr.getError()) {
1002       warnOrExitGivenError(FailMode, EC, Input.Filename);
1003       continue;
1004     }
1005 
1006     // We need to keep the readers around until after all the files are
1007     // read so that we do not lose the function names stored in each
1008     // reader's memory. The function names are needed to write out the
1009     // merged profile map.
1010     Readers.push_back(std::move(ReaderOrErr.get()));
1011     const auto Reader = Readers.back().get();
1012     if (std::error_code EC = Reader->read()) {
1013       warnOrExitGivenError(FailMode, EC, Input.Filename);
1014       Readers.pop_back();
1015       continue;
1016     }
1017 
1018     SampleProfileMap &Profiles = Reader->getProfiles();
1019     if (ProfileIsProbeBased &&
1020         ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
1021       exitWithError(
1022           "cannot merge probe-based profile with non-probe-based profile");
1023     ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
1024     if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS)
1025       exitWithError("cannot merge CS profile with non-CS profile");
1026     ProfileIsCS = FunctionSamples::ProfileIsCS;
1027     for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
1028          I != E; ++I) {
1029       sampleprof_error Result = sampleprof_error::success;
1030       FunctionSamples Remapped =
1031           Remapper ? remapSamples(I->second, *Remapper, Result)
1032                    : FunctionSamples();
1033       FunctionSamples &Samples = Remapper ? Remapped : I->second;
1034       SampleContext FContext = Samples.getContext();
1035       MergeResult(Result, ProfileMap[FContext].merge(Samples, Input.Weight));
1036       if (Result != sampleprof_error::success) {
1037         std::error_code EC = make_error_code(Result);
1038         handleMergeWriterError(errorCodeToError(EC), Input.Filename,
1039                                FContext.toString());
1040       }
1041     }
1042 
1043     if (!DropProfileSymbolList) {
1044       std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
1045           Reader->getProfileSymbolList();
1046       if (ReaderList)
1047         WriterList.merge(*ReaderList);
1048     }
1049   }
1050 
1051   if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
1052     // Use threshold calculated from profile summary unless specified.
1053     SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1054     auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
1055     uint64_t SampleProfColdThreshold =
1056         ProfileSummaryBuilder::getColdCountThreshold(
1057             (Summary->getDetailedSummary()));
1058 
1059     // Trim and merge cold context profile using cold threshold above;
1060     SampleContextTrimmer(ProfileMap)
1061         .trimAndMergeColdContextProfiles(
1062             SampleProfColdThreshold, SampleTrimColdContext,
1063             SampleMergeColdContext, SampleColdContextFrameDepth, false);
1064   }
1065 
1066   if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
1067     ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS);
1068     ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1069   } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
1070     ProfileConverter CSConverter(ProfileMap);
1071     CSConverter.convertCSProfiles();
1072     ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1073   }
1074 
1075   auto WriterOrErr =
1076       SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
1077   if (std::error_code EC = WriterOrErr.getError())
1078     exitWithErrorCode(EC, OutputFilename);
1079 
1080   auto Writer = std::move(WriterOrErr.get());
1081   // WriterList will have StringRef refering to string in Buffer.
1082   // Make sure Buffer lives as long as WriterList.
1083   auto Buffer = getInputFileBuf(ProfileSymbolListFile);
1084   handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList,
1085                         CompressAllSections, UseMD5, GenPartialProfile);
1086 
1087   // If OutputSizeLimit is 0 (default), it is the same as write().
1088   if (std::error_code EC =
1089           Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
1090     exitWithErrorCode(std::move(EC));
1091 }
1092 
1093 static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
1094   StringRef WeightStr, FileName;
1095   std::tie(WeightStr, FileName) = WeightedFilename.split(',');
1096 
1097   uint64_t Weight;
1098   if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
1099     exitWithError("input weight must be a positive integer");
1100 
1101   return {std::string(FileName), Weight};
1102 }
1103 
1104 static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
1105   StringRef Filename = WF.Filename;
1106   uint64_t Weight = WF.Weight;
1107 
1108   // If it's STDIN just pass it on.
1109   if (Filename == "-") {
1110     WNI.push_back({std::string(Filename), Weight});
1111     return;
1112   }
1113 
1114   llvm::sys::fs::file_status Status;
1115   llvm::sys::fs::status(Filename, Status);
1116   if (!llvm::sys::fs::exists(Status))
1117     exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
1118                       Filename);
1119   // If it's a source file, collect it.
1120   if (llvm::sys::fs::is_regular_file(Status)) {
1121     WNI.push_back({std::string(Filename), Weight});
1122     return;
1123   }
1124 
1125   if (llvm::sys::fs::is_directory(Status)) {
1126     std::error_code EC;
1127     for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E;
1128          F != E && !EC; F.increment(EC)) {
1129       if (llvm::sys::fs::is_regular_file(F->path())) {
1130         addWeightedInput(WNI, {F->path(), Weight});
1131       }
1132     }
1133     if (EC)
1134       exitWithErrorCode(EC, Filename);
1135   }
1136 }
1137 
1138 static void parseInputFilenamesFile(MemoryBuffer *Buffer,
1139                                     WeightedFileVector &WFV) {
1140   if (!Buffer)
1141     return;
1142 
1143   SmallVector<StringRef, 8> Entries;
1144   StringRef Data = Buffer->getBuffer();
1145   Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1146   for (const StringRef &FileWeightEntry : Entries) {
1147     StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
1148     // Skip comments.
1149     if (SanitizedEntry.startswith("#"))
1150       continue;
1151     // If there's no comma, it's an unweighted profile.
1152     else if (!SanitizedEntry.contains(','))
1153       addWeightedInput(WFV, {std::string(SanitizedEntry), 1});
1154     else
1155       addWeightedInput(WFV, parseWeightedFile(SanitizedEntry));
1156   }
1157 }
1158 
1159 static int merge_main(int argc, const char *argv[]) {
1160   cl::list<std::string> InputFilenames(cl::Positional,
1161                                        cl::desc("<filename...>"));
1162   cl::list<std::string> WeightedInputFilenames("weighted-input",
1163                                                cl::desc("<weight>,<filename>"));
1164   cl::opt<std::string> InputFilenamesFile(
1165       "input-files", cl::init(""),
1166       cl::desc("Path to file containing newline-separated "
1167                "[<weight>,]<filename> entries"));
1168   cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
1169                                 cl::aliasopt(InputFilenamesFile));
1170   cl::opt<bool> DumpInputFileList(
1171       "dump-input-file-list", cl::init(false), cl::Hidden,
1172       cl::desc("Dump the list of input files and their weights, then exit"));
1173   cl::opt<std::string> RemappingFile("remapping-file", cl::value_desc("file"),
1174                                      cl::desc("Symbol remapping file"));
1175   cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
1176                            cl::aliasopt(RemappingFile));
1177   cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
1178                                       cl::init("-"), cl::desc("Output file"));
1179   cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
1180                             cl::aliasopt(OutputFilename));
1181   cl::opt<ProfileKinds> ProfileKind(
1182       cl::desc("Profile kind:"), cl::init(instr),
1183       cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
1184                  clEnumVal(sample, "Sample profile")));
1185   cl::opt<ProfileFormat> OutputFormat(
1186       cl::desc("Format of output profile"), cl::init(PF_Ext_Binary),
1187       cl::values(
1188           clEnumValN(PF_Binary, "binary", "Binary encoding"),
1189           clEnumValN(PF_Ext_Binary, "extbinary", "Extensible binary encoding "
1190                      "(default)"),
1191           clEnumValN(PF_Text, "text", "Text encoding"),
1192           clEnumValN(PF_GCC, "gcc",
1193                      "GCC encoding (only meaningful for -sample)")));
1194   cl::opt<FailureMode> FailureMode(
1195       "failure-mode", cl::init(failIfAnyAreInvalid), cl::desc("Failure mode:"),
1196       cl::values(clEnumValN(failIfAnyAreInvalid, "any",
1197                             "Fail if any profile is invalid."),
1198                  clEnumValN(failIfAllAreInvalid, "all",
1199                             "Fail only if all profiles are invalid.")));
1200   cl::opt<bool> OutputSparse("sparse", cl::init(false),
1201       cl::desc("Generate a sparse profile (only meaningful for -instr)"));
1202   cl::opt<unsigned> NumThreads(
1203       "num-threads", cl::init(0),
1204       cl::desc("Number of merge threads to use (default: autodetect)"));
1205   cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
1206                         cl::aliasopt(NumThreads));
1207   cl::opt<std::string> ProfileSymbolListFile(
1208       "prof-sym-list", cl::init(""),
1209       cl::desc("Path to file containing the list of function symbols "
1210                "used to populate profile symbol list"));
1211   cl::opt<bool> CompressAllSections(
1212       "compress-all-sections", cl::init(false), cl::Hidden,
1213       cl::desc("Compress all sections when writing the profile (only "
1214                "meaningful for -extbinary)"));
1215   cl::opt<bool> UseMD5(
1216       "use-md5", cl::init(false), cl::Hidden,
1217       cl::desc("Choose to use MD5 to represent string in name table (only "
1218                "meaningful for -extbinary)"));
1219   cl::opt<bool> SampleMergeColdContext(
1220       "sample-merge-cold-context", cl::init(false), cl::Hidden,
1221       cl::desc(
1222           "Merge context sample profiles whose count is below cold threshold"));
1223   cl::opt<bool> SampleTrimColdContext(
1224       "sample-trim-cold-context", cl::init(false), cl::Hidden,
1225       cl::desc(
1226           "Trim context sample profiles whose count is below cold threshold"));
1227   cl::opt<uint32_t> SampleColdContextFrameDepth(
1228       "sample-frame-depth-for-cold-context", cl::init(1),
1229       cl::desc("Keep the last K frames while merging cold profile. 1 means the "
1230                "context-less base profile"));
1231   cl::opt<size_t> OutputSizeLimit(
1232       "output-size-limit", cl::init(0), cl::Hidden,
1233       cl::desc("Trim cold functions until profile size is below specified "
1234                "limit in bytes. This uses a heursitic and functions may be "
1235                "excessively trimmed"));
1236   cl::opt<bool> GenPartialProfile(
1237       "gen-partial-profile", cl::init(false), cl::Hidden,
1238       cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
1239   cl::opt<std::string> SupplInstrWithSample(
1240       "supplement-instr-with-sample", cl::init(""), cl::Hidden,
1241       cl::desc("Supplement an instr profile with sample profile, to correct "
1242                "the profile unrepresentativeness issue. The sample "
1243                "profile is the input of the flag. Output will be in instr "
1244                "format (The flag only works with -instr)"));
1245   cl::opt<float> ZeroCounterThreshold(
1246       "zero-counter-threshold", cl::init(0.7), cl::Hidden,
1247       cl::desc("For the function which is cold in instr profile but hot in "
1248                "sample profile, if the ratio of the number of zero counters "
1249                "divided by the total number of counters is above the "
1250                "threshold, the profile of the function will be regarded as "
1251                "being harmful for performance and will be dropped."));
1252   cl::opt<unsigned> SupplMinSizeThreshold(
1253       "suppl-min-size-threshold", cl::init(10), cl::Hidden,
1254       cl::desc("If the size of a function is smaller than the threshold, "
1255                "assume it can be inlined by PGO early inliner and it won't "
1256                "be adjusted based on sample profile."));
1257   cl::opt<unsigned> InstrProfColdThreshold(
1258       "instr-prof-cold-threshold", cl::init(0), cl::Hidden,
1259       cl::desc("User specified cold threshold for instr profile which will "
1260                "override the cold threshold got from profile summary. "));
1261   cl::opt<SampleProfileLayout> ProfileLayout(
1262       "convert-sample-profile-layout",
1263       cl::desc("Convert the generated profile to a profile with a new layout"),
1264       cl::init(SPL_None),
1265       cl::values(
1266           clEnumValN(SPL_Nest, "nest",
1267                      "Nested profile, the input should be CS flat profile"),
1268           clEnumValN(SPL_Flat, "flat",
1269                      "Profile with nested inlinee flatten out")));
1270   cl::opt<std::string> DebugInfoFilename(
1271       "debug-info", cl::init(""),
1272       cl::desc("Use the provided debug info to correlate the raw profile."));
1273   cl::opt<std::string> ProfiledBinary(
1274       "profiled-binary", cl::init(""),
1275       cl::desc("Path to binary from which the profile was collected."));
1276   cl::opt<bool> DropProfileSymbolList(
1277       "drop-profile-symbol-list", cl::init(false), cl::Hidden,
1278       cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
1279                "(only meaningful for -sample)"));
1280   // WARNING: This reservoir size value is propagated to any input indexed
1281   // profiles for simplicity. Changing this value between invocations could
1282   // result in sample bias.
1283   cl::opt<uint64_t> TemporalProfTraceReservoirSize(
1284       "temporal-profile-trace-reservoir-size", cl::init(100),
1285       cl::desc("The maximum number of stored temporal profile traces (default: "
1286                "100)"));
1287   cl::opt<uint64_t> TemporalProfMaxTraceLength(
1288       "temporal-profile-max-trace-length", cl::init(10000),
1289       cl::desc("The maximum length of a single temporal profile trace "
1290                "(default: 10000)"));
1291 
1292   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
1293 
1294   WeightedFileVector WeightedInputs;
1295   for (StringRef Filename : InputFilenames)
1296     addWeightedInput(WeightedInputs, {std::string(Filename), 1});
1297   for (StringRef WeightedFilename : WeightedInputFilenames)
1298     addWeightedInput(WeightedInputs, parseWeightedFile(WeightedFilename));
1299 
1300   // Make sure that the file buffer stays alive for the duration of the
1301   // weighted input vector's lifetime.
1302   auto Buffer = getInputFileBuf(InputFilenamesFile);
1303   parseInputFilenamesFile(Buffer.get(), WeightedInputs);
1304 
1305   if (WeightedInputs.empty())
1306     exitWithError("no input files specified. See " +
1307                   sys::path::filename(argv[0]) + " -help");
1308 
1309   if (DumpInputFileList) {
1310     for (auto &WF : WeightedInputs)
1311       outs() << WF.Weight << "," << WF.Filename << "\n";
1312     return 0;
1313   }
1314 
1315   std::unique_ptr<SymbolRemapper> Remapper;
1316   if (!RemappingFile.empty())
1317     Remapper = SymbolRemapper::create(RemappingFile);
1318 
1319   if (!SupplInstrWithSample.empty()) {
1320     if (ProfileKind != instr)
1321       exitWithError(
1322           "-supplement-instr-with-sample can only work with -instr. ");
1323 
1324     supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputFilename,
1325                            OutputFormat, OutputSparse, SupplMinSizeThreshold,
1326                            ZeroCounterThreshold, InstrProfColdThreshold);
1327     return 0;
1328   }
1329 
1330   if (ProfileKind == instr)
1331     mergeInstrProfile(WeightedInputs, DebugInfoFilename, Remapper.get(),
1332                       OutputFilename, OutputFormat,
1333                       TemporalProfTraceReservoirSize,
1334                       TemporalProfMaxTraceLength, OutputSparse, NumThreads,
1335                       FailureMode, ProfiledBinary);
1336   else
1337     mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
1338                        OutputFormat, ProfileSymbolListFile, CompressAllSections,
1339                        UseMD5, GenPartialProfile, ProfileLayout,
1340                        SampleMergeColdContext, SampleTrimColdContext,
1341                        SampleColdContextFrameDepth, FailureMode,
1342                        DropProfileSymbolList, OutputSizeLimit);
1343   return 0;
1344 }
1345 
1346 /// Computer the overlap b/w profile BaseFilename and profile TestFilename.
1347 static void overlapInstrProfile(const std::string &BaseFilename,
1348                                 const std::string &TestFilename,
1349                                 const OverlapFuncFilters &FuncFilter,
1350                                 raw_fd_ostream &OS, bool IsCS) {
1351   std::mutex ErrorLock;
1352   SmallSet<instrprof_error, 4> WriterErrorCodes;
1353   WriterContext Context(false, ErrorLock, WriterErrorCodes);
1354   WeightedFile WeightedInput{BaseFilename, 1};
1355   OverlapStats Overlap;
1356   Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS);
1357   if (E)
1358     exitWithError(std::move(E), "error in getting profile count sums");
1359   if (Overlap.Base.CountSum < 1.0f) {
1360     OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
1361     exit(0);
1362   }
1363   if (Overlap.Test.CountSum < 1.0f) {
1364     OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
1365     exit(0);
1366   }
1367   loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context);
1368   overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS,
1369                IsCS);
1370   Overlap.dump(OS);
1371 }
1372 
1373 namespace {
1374 struct SampleOverlapStats {
1375   SampleContext BaseName;
1376   SampleContext TestName;
1377   // Number of overlap units
1378   uint64_t OverlapCount;
1379   // Total samples of overlap units
1380   uint64_t OverlapSample;
1381   // Number of and total samples of units that only present in base or test
1382   // profile
1383   uint64_t BaseUniqueCount;
1384   uint64_t BaseUniqueSample;
1385   uint64_t TestUniqueCount;
1386   uint64_t TestUniqueSample;
1387   // Number of units and total samples in base or test profile
1388   uint64_t BaseCount;
1389   uint64_t BaseSample;
1390   uint64_t TestCount;
1391   uint64_t TestSample;
1392   // Number of and total samples of units that present in at least one profile
1393   uint64_t UnionCount;
1394   uint64_t UnionSample;
1395   // Weighted similarity
1396   double Similarity;
1397   // For SampleOverlapStats instances representing functions, weights of the
1398   // function in base and test profiles
1399   double BaseWeight;
1400   double TestWeight;
1401 
1402   SampleOverlapStats()
1403       : OverlapCount(0), OverlapSample(0), BaseUniqueCount(0),
1404         BaseUniqueSample(0), TestUniqueCount(0), TestUniqueSample(0),
1405         BaseCount(0), BaseSample(0), TestCount(0), TestSample(0), UnionCount(0),
1406         UnionSample(0), Similarity(0.0), BaseWeight(0.0), TestWeight(0.0) {}
1407 };
1408 } // end anonymous namespace
1409 
1410 namespace {
1411 struct FuncSampleStats {
1412   uint64_t SampleSum;
1413   uint64_t MaxSample;
1414   uint64_t HotBlockCount;
1415   FuncSampleStats() : SampleSum(0), MaxSample(0), HotBlockCount(0) {}
1416   FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample,
1417                   uint64_t HotBlockCount)
1418       : SampleSum(SampleSum), MaxSample(MaxSample),
1419         HotBlockCount(HotBlockCount) {}
1420 };
1421 } // end anonymous namespace
1422 
1423 namespace {
1424 enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None };
1425 
1426 // Class for updating merging steps for two sorted maps. The class should be
1427 // instantiated with a map iterator type.
1428 template <class T> class MatchStep {
1429 public:
1430   MatchStep() = delete;
1431 
1432   MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd)
1433       : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter),
1434         SecondEnd(SecondEnd), Status(MS_None) {}
1435 
1436   bool areBothFinished() const {
1437     return (FirstIter == FirstEnd && SecondIter == SecondEnd);
1438   }
1439 
1440   bool isFirstFinished() const { return FirstIter == FirstEnd; }
1441 
1442   bool isSecondFinished() const { return SecondIter == SecondEnd; }
1443 
1444   /// Advance one step based on the previous match status unless the previous
1445   /// status is MS_None. Then update Status based on the comparison between two
1446   /// container iterators at the current step. If the previous status is
1447   /// MS_None, it means two iterators are at the beginning and no comparison has
1448   /// been made, so we simply update Status without advancing the iterators.
1449   void updateOneStep();
1450 
1451   T getFirstIter() const { return FirstIter; }
1452 
1453   T getSecondIter() const { return SecondIter; }
1454 
1455   MatchStatus getMatchStatus() const { return Status; }
1456 
1457 private:
1458   // Current iterator and end iterator of the first container.
1459   T FirstIter;
1460   T FirstEnd;
1461   // Current iterator and end iterator of the second container.
1462   T SecondIter;
1463   T SecondEnd;
1464   // Match status of the current step.
1465   MatchStatus Status;
1466 };
1467 } // end anonymous namespace
1468 
1469 template <class T> void MatchStep<T>::updateOneStep() {
1470   switch (Status) {
1471   case MS_Match:
1472     ++FirstIter;
1473     ++SecondIter;
1474     break;
1475   case MS_FirstUnique:
1476     ++FirstIter;
1477     break;
1478   case MS_SecondUnique:
1479     ++SecondIter;
1480     break;
1481   case MS_None:
1482     break;
1483   }
1484 
1485   // Update Status according to iterators at the current step.
1486   if (areBothFinished())
1487     return;
1488   if (FirstIter != FirstEnd &&
1489       (SecondIter == SecondEnd || FirstIter->first < SecondIter->first))
1490     Status = MS_FirstUnique;
1491   else if (SecondIter != SecondEnd &&
1492            (FirstIter == FirstEnd || SecondIter->first < FirstIter->first))
1493     Status = MS_SecondUnique;
1494   else
1495     Status = MS_Match;
1496 }
1497 
1498 // Return the sum of line/block samples, the max line/block sample, and the
1499 // number of line/block samples above the given threshold in a function
1500 // including its inlinees.
1501 static void getFuncSampleStats(const sampleprof::FunctionSamples &Func,
1502                                FuncSampleStats &FuncStats,
1503                                uint64_t HotThreshold) {
1504   for (const auto &L : Func.getBodySamples()) {
1505     uint64_t Sample = L.second.getSamples();
1506     FuncStats.SampleSum += Sample;
1507     FuncStats.MaxSample = std::max(FuncStats.MaxSample, Sample);
1508     if (Sample >= HotThreshold)
1509       ++FuncStats.HotBlockCount;
1510   }
1511 
1512   for (const auto &C : Func.getCallsiteSamples()) {
1513     for (const auto &F : C.second)
1514       getFuncSampleStats(F.second, FuncStats, HotThreshold);
1515   }
1516 }
1517 
1518 /// Predicate that determines if a function is hot with a given threshold. We
1519 /// keep it separate from its callsites for possible extension in the future.
1520 static bool isFunctionHot(const FuncSampleStats &FuncStats,
1521                           uint64_t HotThreshold) {
1522   // We intentionally compare the maximum sample count in a function with the
1523   // HotThreshold to get an approximate determination on hot functions.
1524   return (FuncStats.MaxSample >= HotThreshold);
1525 }
1526 
1527 namespace {
1528 class SampleOverlapAggregator {
1529 public:
1530   SampleOverlapAggregator(const std::string &BaseFilename,
1531                           const std::string &TestFilename,
1532                           double LowSimilarityThreshold, double Epsilon,
1533                           const OverlapFuncFilters &FuncFilter)
1534       : BaseFilename(BaseFilename), TestFilename(TestFilename),
1535         LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon),
1536         FuncFilter(FuncFilter) {}
1537 
1538   /// Detect 0-sample input profile and report to output stream. This interface
1539   /// should be called after loadProfiles().
1540   bool detectZeroSampleProfile(raw_fd_ostream &OS) const;
1541 
1542   /// Write out function-level similarity statistics for functions specified by
1543   /// options --function, --value-cutoff, and --similarity-cutoff.
1544   void dumpFuncSimilarity(raw_fd_ostream &OS) const;
1545 
1546   /// Write out program-level similarity and overlap statistics.
1547   void dumpProgramSummary(raw_fd_ostream &OS) const;
1548 
1549   /// Write out hot-function and hot-block statistics for base_profile,
1550   /// test_profile, and their overlap. For both cases, the overlap HO is
1551   /// calculated as follows:
1552   ///    Given the number of functions (or blocks) that are hot in both profiles
1553   ///    HCommon and the number of functions (or blocks) that are hot in at
1554   ///    least one profile HUnion, HO = HCommon / HUnion.
1555   void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const;
1556 
1557   /// This function tries matching functions in base and test profiles. For each
1558   /// pair of matched functions, it aggregates the function-level
1559   /// similarity into a profile-level similarity. It also dump function-level
1560   /// similarity information of functions specified by --function,
1561   /// --value-cutoff, and --similarity-cutoff options. The program-level
1562   /// similarity PS is computed as follows:
1563   ///     Given function-level similarity FS(A) for all function A, the
1564   ///     weight of function A in base profile WB(A), and the weight of function
1565   ///     A in test profile WT(A), compute PS(base_profile, test_profile) =
1566   ///     sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
1567   ///     meaning no-overlap.
1568   void computeSampleProfileOverlap(raw_fd_ostream &OS);
1569 
1570   /// Initialize ProfOverlap with the sum of samples in base and test
1571   /// profiles. This function also computes and keeps the sum of samples and
1572   /// max sample counts of each function in BaseStats and TestStats for later
1573   /// use to avoid re-computations.
1574   void initializeSampleProfileOverlap();
1575 
1576   /// Load profiles specified by BaseFilename and TestFilename.
1577   std::error_code loadProfiles();
1578 
1579   using FuncSampleStatsMap =
1580       std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>;
1581 
1582 private:
1583   SampleOverlapStats ProfOverlap;
1584   SampleOverlapStats HotFuncOverlap;
1585   SampleOverlapStats HotBlockOverlap;
1586   std::string BaseFilename;
1587   std::string TestFilename;
1588   std::unique_ptr<sampleprof::SampleProfileReader> BaseReader;
1589   std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
1590   // BaseStats and TestStats hold FuncSampleStats for each function, with
1591   // function name as the key.
1592   FuncSampleStatsMap BaseStats;
1593   FuncSampleStatsMap TestStats;
1594   // Low similarity threshold in floating point number
1595   double LowSimilarityThreshold;
1596   // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
1597   // for tracking hot blocks.
1598   uint64_t BaseHotThreshold;
1599   uint64_t TestHotThreshold;
1600   // A small threshold used to round the results of floating point accumulations
1601   // to resolve imprecision.
1602   const double Epsilon;
1603   std::multimap<double, SampleOverlapStats, std::greater<double>>
1604       FuncSimilarityDump;
1605   // FuncFilter carries specifications in options --value-cutoff and
1606   // --function.
1607   OverlapFuncFilters FuncFilter;
1608   // Column offsets for printing the function-level details table.
1609   static const unsigned int TestWeightCol = 15;
1610   static const unsigned int SimilarityCol = 30;
1611   static const unsigned int OverlapCol = 43;
1612   static const unsigned int BaseUniqueCol = 53;
1613   static const unsigned int TestUniqueCol = 67;
1614   static const unsigned int BaseSampleCol = 81;
1615   static const unsigned int TestSampleCol = 96;
1616   static const unsigned int FuncNameCol = 111;
1617 
1618   /// Return a similarity of two line/block sample counters in the same
1619   /// function in base and test profiles. The line/block-similarity BS(i) is
1620   /// computed as follows:
1621   ///    For an offsets i, given the sample count at i in base profile BB(i),
1622   ///    the sample count at i in test profile BT(i), the sum of sample counts
1623   ///    in this function in base profile SB, and the sum of sample counts in
1624   ///    this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
1625   ///    BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
1626   double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample,
1627                                 const SampleOverlapStats &FuncOverlap) const;
1628 
1629   void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
1630                              uint64_t HotBlockCount);
1631 
1632   void getHotFunctions(const FuncSampleStatsMap &ProfStats,
1633                        FuncSampleStatsMap &HotFunc,
1634                        uint64_t HotThreshold) const;
1635 
1636   void computeHotFuncOverlap();
1637 
1638   /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
1639   /// Difference for two sample units in a matched function according to the
1640   /// given match status.
1641   void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample,
1642                                      uint64_t HotBlockCount,
1643                                      SampleOverlapStats &FuncOverlap,
1644                                      double &Difference, MatchStatus Status);
1645 
1646   /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
1647   /// Difference for unmatched callees that only present in one profile in a
1648   /// matched caller function.
1649   void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func,
1650                                 SampleOverlapStats &FuncOverlap,
1651                                 double &Difference, MatchStatus Status);
1652 
1653   /// This function updates sample overlap statistics of an overlap function in
1654   /// base and test profile. It also calculates a function-internal similarity
1655   /// FIS as follows:
1656   ///    For offsets i that have samples in at least one profile in this
1657   ///    function A, given BS(i) returned by computeBlockSimilarity(), compute
1658   ///    FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
1659   ///    0.0 meaning no overlap.
1660   double computeSampleFunctionInternalOverlap(
1661       const sampleprof::FunctionSamples &BaseFunc,
1662       const sampleprof::FunctionSamples &TestFunc,
1663       SampleOverlapStats &FuncOverlap);
1664 
1665   /// Function-level similarity (FS) is a weighted value over function internal
1666   /// similarity (FIS). This function computes a function's FS from its FIS by
1667   /// applying the weight.
1668   double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample,
1669                                  uint64_t TestFuncSample) const;
1670 
1671   /// The function-level similarity FS(A) for a function A is computed as
1672   /// follows:
1673   ///     Compute a function-internal similarity FIS(A) by
1674   ///     computeSampleFunctionInternalOverlap(). Then, with the weight of
1675   ///     function A in base profile WB(A), and the weight of function A in test
1676   ///     profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
1677   ///     ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
1678   double
1679   computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc,
1680                                const sampleprof::FunctionSamples *TestFunc,
1681                                SampleOverlapStats *FuncOverlap,
1682                                uint64_t BaseFuncSample,
1683                                uint64_t TestFuncSample);
1684 
1685   /// Profile-level similarity (PS) is a weighted aggregate over function-level
1686   /// similarities (FS). This method weights the FS value by the function
1687   /// weights in the base and test profiles for the aggregation.
1688   double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample,
1689                             uint64_t TestFuncSample) const;
1690 };
1691 } // end anonymous namespace
1692 
1693 bool SampleOverlapAggregator::detectZeroSampleProfile(
1694     raw_fd_ostream &OS) const {
1695   bool HaveZeroSample = false;
1696   if (ProfOverlap.BaseSample == 0) {
1697     OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n";
1698     HaveZeroSample = true;
1699   }
1700   if (ProfOverlap.TestSample == 0) {
1701     OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n";
1702     HaveZeroSample = true;
1703   }
1704   return HaveZeroSample;
1705 }
1706 
1707 double SampleOverlapAggregator::computeBlockSimilarity(
1708     uint64_t BaseSample, uint64_t TestSample,
1709     const SampleOverlapStats &FuncOverlap) const {
1710   double BaseFrac = 0.0;
1711   double TestFrac = 0.0;
1712   if (FuncOverlap.BaseSample > 0)
1713     BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample;
1714   if (FuncOverlap.TestSample > 0)
1715     TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample;
1716   return 1.0 - std::fabs(BaseFrac - TestFrac);
1717 }
1718 
1719 void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
1720                                                     uint64_t TestSample,
1721                                                     uint64_t HotBlockCount) {
1722   bool IsBaseHot = (BaseSample >= BaseHotThreshold);
1723   bool IsTestHot = (TestSample >= TestHotThreshold);
1724   if (!IsBaseHot && !IsTestHot)
1725     return;
1726 
1727   HotBlockOverlap.UnionCount += HotBlockCount;
1728   if (IsBaseHot)
1729     HotBlockOverlap.BaseCount += HotBlockCount;
1730   if (IsTestHot)
1731     HotBlockOverlap.TestCount += HotBlockCount;
1732   if (IsBaseHot && IsTestHot)
1733     HotBlockOverlap.OverlapCount += HotBlockCount;
1734 }
1735 
1736 void SampleOverlapAggregator::getHotFunctions(
1737     const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc,
1738     uint64_t HotThreshold) const {
1739   for (const auto &F : ProfStats) {
1740     if (isFunctionHot(F.second, HotThreshold))
1741       HotFunc.emplace(F.first, F.second);
1742   }
1743 }
1744 
1745 void SampleOverlapAggregator::computeHotFuncOverlap() {
1746   FuncSampleStatsMap BaseHotFunc;
1747   getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold);
1748   HotFuncOverlap.BaseCount = BaseHotFunc.size();
1749 
1750   FuncSampleStatsMap TestHotFunc;
1751   getHotFunctions(TestStats, TestHotFunc, TestHotThreshold);
1752   HotFuncOverlap.TestCount = TestHotFunc.size();
1753   HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
1754 
1755   for (const auto &F : BaseHotFunc) {
1756     if (TestHotFunc.count(F.first))
1757       ++HotFuncOverlap.OverlapCount;
1758     else
1759       ++HotFuncOverlap.UnionCount;
1760   }
1761 }
1762 
1763 void SampleOverlapAggregator::updateOverlapStatsForFunction(
1764     uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount,
1765     SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) {
1766   assert(Status != MS_None &&
1767          "Match status should be updated before updating overlap statistics");
1768   if (Status == MS_FirstUnique) {
1769     TestSample = 0;
1770     FuncOverlap.BaseUniqueSample += BaseSample;
1771   } else if (Status == MS_SecondUnique) {
1772     BaseSample = 0;
1773     FuncOverlap.TestUniqueSample += TestSample;
1774   } else {
1775     ++FuncOverlap.OverlapCount;
1776   }
1777 
1778   FuncOverlap.UnionSample += std::max(BaseSample, TestSample);
1779   FuncOverlap.OverlapSample += std::min(BaseSample, TestSample);
1780   Difference +=
1781       1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
1782   updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount);
1783 }
1784 
1785 void SampleOverlapAggregator::updateForUnmatchedCallee(
1786     const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap,
1787     double &Difference, MatchStatus Status) {
1788   assert((Status == MS_FirstUnique || Status == MS_SecondUnique) &&
1789          "Status must be either of the two unmatched cases");
1790   FuncSampleStats FuncStats;
1791   if (Status == MS_FirstUnique) {
1792     getFuncSampleStats(Func, FuncStats, BaseHotThreshold);
1793     updateOverlapStatsForFunction(FuncStats.SampleSum, 0,
1794                                   FuncStats.HotBlockCount, FuncOverlap,
1795                                   Difference, Status);
1796   } else {
1797     getFuncSampleStats(Func, FuncStats, TestHotThreshold);
1798     updateOverlapStatsForFunction(0, FuncStats.SampleSum,
1799                                   FuncStats.HotBlockCount, FuncOverlap,
1800                                   Difference, Status);
1801   }
1802 }
1803 
1804 double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
1805     const sampleprof::FunctionSamples &BaseFunc,
1806     const sampleprof::FunctionSamples &TestFunc,
1807     SampleOverlapStats &FuncOverlap) {
1808 
1809   using namespace sampleprof;
1810 
1811   double Difference = 0;
1812 
1813   // Accumulate Difference for regular line/block samples in the function.
1814   // We match them through sort-merge join algorithm because
1815   // FunctionSamples::getBodySamples() returns a map of sample counters ordered
1816   // by their offsets.
1817   MatchStep<BodySampleMap::const_iterator> BlockIterStep(
1818       BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(),
1819       TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend());
1820   BlockIterStep.updateOneStep();
1821   while (!BlockIterStep.areBothFinished()) {
1822     uint64_t BaseSample =
1823         BlockIterStep.isFirstFinished()
1824             ? 0
1825             : BlockIterStep.getFirstIter()->second.getSamples();
1826     uint64_t TestSample =
1827         BlockIterStep.isSecondFinished()
1828             ? 0
1829             : BlockIterStep.getSecondIter()->second.getSamples();
1830     updateOverlapStatsForFunction(BaseSample, TestSample, 1, FuncOverlap,
1831                                   Difference, BlockIterStep.getMatchStatus());
1832 
1833     BlockIterStep.updateOneStep();
1834   }
1835 
1836   // Accumulate Difference for callsite lines in the function. We match
1837   // them through sort-merge algorithm because
1838   // FunctionSamples::getCallsiteSamples() returns a map of callsite records
1839   // ordered by their offsets.
1840   MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep(
1841       BaseFunc.getCallsiteSamples().cbegin(),
1842       BaseFunc.getCallsiteSamples().cend(),
1843       TestFunc.getCallsiteSamples().cbegin(),
1844       TestFunc.getCallsiteSamples().cend());
1845   CallsiteIterStep.updateOneStep();
1846   while (!CallsiteIterStep.areBothFinished()) {
1847     MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus();
1848     assert(CallsiteStepStatus != MS_None &&
1849            "Match status should be updated before entering loop body");
1850 
1851     if (CallsiteStepStatus != MS_Match) {
1852       auto Callsite = (CallsiteStepStatus == MS_FirstUnique)
1853                           ? CallsiteIterStep.getFirstIter()
1854                           : CallsiteIterStep.getSecondIter();
1855       for (const auto &F : Callsite->second)
1856         updateForUnmatchedCallee(F.second, FuncOverlap, Difference,
1857                                  CallsiteStepStatus);
1858     } else {
1859       // There may be multiple inlinees at the same offset, so we need to try
1860       // matching all of them. This match is implemented through sort-merge
1861       // algorithm because callsite records at the same offset are ordered by
1862       // function names.
1863       MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep(
1864           CallsiteIterStep.getFirstIter()->second.cbegin(),
1865           CallsiteIterStep.getFirstIter()->second.cend(),
1866           CallsiteIterStep.getSecondIter()->second.cbegin(),
1867           CallsiteIterStep.getSecondIter()->second.cend());
1868       CalleeIterStep.updateOneStep();
1869       while (!CalleeIterStep.areBothFinished()) {
1870         MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus();
1871         if (CalleeStepStatus != MS_Match) {
1872           auto Callee = (CalleeStepStatus == MS_FirstUnique)
1873                             ? CalleeIterStep.getFirstIter()
1874                             : CalleeIterStep.getSecondIter();
1875           updateForUnmatchedCallee(Callee->second, FuncOverlap, Difference,
1876                                    CalleeStepStatus);
1877         } else {
1878           // An inlined function can contain other inlinees inside, so compute
1879           // the Difference recursively.
1880           Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap(
1881                                       CalleeIterStep.getFirstIter()->second,
1882                                       CalleeIterStep.getSecondIter()->second,
1883                                       FuncOverlap);
1884         }
1885         CalleeIterStep.updateOneStep();
1886       }
1887     }
1888     CallsiteIterStep.updateOneStep();
1889   }
1890 
1891   // Difference reflects the total differences of line/block samples in this
1892   // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
1893   // reflect the similarity between function profiles in [0.0f to 1.0f].
1894   return (2.0 - Difference) / 2;
1895 }
1896 
1897 double SampleOverlapAggregator::weightForFuncSimilarity(
1898     double FuncInternalSimilarity, uint64_t BaseFuncSample,
1899     uint64_t TestFuncSample) const {
1900   // Compute the weight as the distance between the function weights in two
1901   // profiles.
1902   double BaseFrac = 0.0;
1903   double TestFrac = 0.0;
1904   assert(ProfOverlap.BaseSample > 0 &&
1905          "Total samples in base profile should be greater than 0");
1906   BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample;
1907   assert(ProfOverlap.TestSample > 0 &&
1908          "Total samples in test profile should be greater than 0");
1909   TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample;
1910   double WeightDistance = std::fabs(BaseFrac - TestFrac);
1911 
1912   // Take WeightDistance into the similarity.
1913   return FuncInternalSimilarity * (1 - WeightDistance);
1914 }
1915 
1916 double
1917 SampleOverlapAggregator::weightByImportance(double FuncSimilarity,
1918                                             uint64_t BaseFuncSample,
1919                                             uint64_t TestFuncSample) const {
1920 
1921   double BaseFrac = 0.0;
1922   double TestFrac = 0.0;
1923   assert(ProfOverlap.BaseSample > 0 &&
1924          "Total samples in base profile should be greater than 0");
1925   BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0;
1926   assert(ProfOverlap.TestSample > 0 &&
1927          "Total samples in test profile should be greater than 0");
1928   TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0;
1929   return FuncSimilarity * (BaseFrac + TestFrac);
1930 }
1931 
1932 double SampleOverlapAggregator::computeSampleFunctionOverlap(
1933     const sampleprof::FunctionSamples *BaseFunc,
1934     const sampleprof::FunctionSamples *TestFunc,
1935     SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample,
1936     uint64_t TestFuncSample) {
1937   // Default function internal similarity before weighted, meaning two functions
1938   // has no overlap.
1939   const double DefaultFuncInternalSimilarity = 0;
1940   double FuncSimilarity;
1941   double FuncInternalSimilarity;
1942 
1943   // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
1944   // In this case, we use DefaultFuncInternalSimilarity as the function internal
1945   // similarity.
1946   if (!BaseFunc || !TestFunc) {
1947     FuncInternalSimilarity = DefaultFuncInternalSimilarity;
1948   } else {
1949     assert(FuncOverlap != nullptr &&
1950            "FuncOverlap should be provided in this case");
1951     FuncInternalSimilarity = computeSampleFunctionInternalOverlap(
1952         *BaseFunc, *TestFunc, *FuncOverlap);
1953     // Now, FuncInternalSimilarity may be a little less than 0 due to
1954     // imprecision of floating point accumulations. Make it zero if the
1955     // difference is below Epsilon.
1956     FuncInternalSimilarity = (std::fabs(FuncInternalSimilarity - 0) < Epsilon)
1957                                  ? 0
1958                                  : FuncInternalSimilarity;
1959   }
1960   FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity,
1961                                            BaseFuncSample, TestFuncSample);
1962   return FuncSimilarity;
1963 }
1964 
1965 void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
1966   using namespace sampleprof;
1967 
1968   std::unordered_map<SampleContext, const FunctionSamples *,
1969                      SampleContext::Hash>
1970       BaseFuncProf;
1971   const auto &BaseProfiles = BaseReader->getProfiles();
1972   for (const auto &BaseFunc : BaseProfiles) {
1973     BaseFuncProf.emplace(BaseFunc.second.getContext(), &(BaseFunc.second));
1974   }
1975   ProfOverlap.UnionCount = BaseFuncProf.size();
1976 
1977   const auto &TestProfiles = TestReader->getProfiles();
1978   for (const auto &TestFunc : TestProfiles) {
1979     SampleOverlapStats FuncOverlap;
1980     FuncOverlap.TestName = TestFunc.second.getContext();
1981     assert(TestStats.count(FuncOverlap.TestName) &&
1982            "TestStats should have records for all functions in test profile "
1983            "except inlinees");
1984     FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
1985 
1986     bool Matched = false;
1987     const auto Match = BaseFuncProf.find(FuncOverlap.TestName);
1988     if (Match == BaseFuncProf.end()) {
1989       const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
1990       ++ProfOverlap.TestUniqueCount;
1991       ProfOverlap.TestUniqueSample += FuncStats.SampleSum;
1992       FuncOverlap.TestUniqueSample = FuncStats.SampleSum;
1993 
1994       updateHotBlockOverlap(0, FuncStats.SampleSum, FuncStats.HotBlockCount);
1995 
1996       double FuncSimilarity = computeSampleFunctionOverlap(
1997           nullptr, nullptr, nullptr, 0, FuncStats.SampleSum);
1998       ProfOverlap.Similarity +=
1999           weightByImportance(FuncSimilarity, 0, FuncStats.SampleSum);
2000 
2001       ++ProfOverlap.UnionCount;
2002       ProfOverlap.UnionSample += FuncStats.SampleSum;
2003     } else {
2004       ++ProfOverlap.OverlapCount;
2005 
2006       // Two functions match with each other. Compute function-level overlap and
2007       // aggregate them into profile-level overlap.
2008       FuncOverlap.BaseName = Match->second->getContext();
2009       assert(BaseStats.count(FuncOverlap.BaseName) &&
2010              "BaseStats should have records for all functions in base profile "
2011              "except inlinees");
2012       FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum;
2013 
2014       FuncOverlap.Similarity = computeSampleFunctionOverlap(
2015           Match->second, &TestFunc.second, &FuncOverlap, FuncOverlap.BaseSample,
2016           FuncOverlap.TestSample);
2017       ProfOverlap.Similarity +=
2018           weightByImportance(FuncOverlap.Similarity, FuncOverlap.BaseSample,
2019                              FuncOverlap.TestSample);
2020       ProfOverlap.OverlapSample += FuncOverlap.OverlapSample;
2021       ProfOverlap.UnionSample += FuncOverlap.UnionSample;
2022 
2023       // Accumulate the percentage of base unique and test unique samples into
2024       // ProfOverlap.
2025       ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample;
2026       ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample;
2027 
2028       // Remove matched base functions for later reporting functions not found
2029       // in test profile.
2030       BaseFuncProf.erase(Match);
2031       Matched = true;
2032     }
2033 
2034     // Print function-level similarity information if specified by options.
2035     assert(TestStats.count(FuncOverlap.TestName) &&
2036            "TestStats should have records for all functions in test profile "
2037            "except inlinees");
2038     if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
2039         (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) ||
2040         (Matched && !FuncFilter.NameFilter.empty() &&
2041          FuncOverlap.BaseName.toString().find(FuncFilter.NameFilter) !=
2042              std::string::npos)) {
2043       assert(ProfOverlap.BaseSample > 0 &&
2044              "Total samples in base profile should be greater than 0");
2045       FuncOverlap.BaseWeight =
2046           static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample;
2047       assert(ProfOverlap.TestSample > 0 &&
2048              "Total samples in test profile should be greater than 0");
2049       FuncOverlap.TestWeight =
2050           static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample;
2051       FuncSimilarityDump.emplace(FuncOverlap.BaseWeight, FuncOverlap);
2052     }
2053   }
2054 
2055   // Traverse through functions in base profile but not in test profile.
2056   for (const auto &F : BaseFuncProf) {
2057     assert(BaseStats.count(F.second->getContext()) &&
2058            "BaseStats should have records for all functions in base profile "
2059            "except inlinees");
2060     const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()];
2061     ++ProfOverlap.BaseUniqueCount;
2062     ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
2063 
2064     updateHotBlockOverlap(FuncStats.SampleSum, 0, FuncStats.HotBlockCount);
2065 
2066     double FuncSimilarity = computeSampleFunctionOverlap(
2067         nullptr, nullptr, nullptr, FuncStats.SampleSum, 0);
2068     ProfOverlap.Similarity +=
2069         weightByImportance(FuncSimilarity, FuncStats.SampleSum, 0);
2070 
2071     ProfOverlap.UnionSample += FuncStats.SampleSum;
2072   }
2073 
2074   // Now, ProfSimilarity may be a little greater than 1 due to imprecision
2075   // of floating point accumulations. Make it 1.0 if the difference is below
2076   // Epsilon.
2077   ProfOverlap.Similarity = (std::fabs(ProfOverlap.Similarity - 1) < Epsilon)
2078                                ? 1
2079                                : ProfOverlap.Similarity;
2080 
2081   computeHotFuncOverlap();
2082 }
2083 
2084 void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2085   const auto &BaseProf = BaseReader->getProfiles();
2086   for (const auto &I : BaseProf) {
2087     ++ProfOverlap.BaseCount;
2088     FuncSampleStats FuncStats;
2089     getFuncSampleStats(I.second, FuncStats, BaseHotThreshold);
2090     ProfOverlap.BaseSample += FuncStats.SampleSum;
2091     BaseStats.emplace(I.second.getContext(), FuncStats);
2092   }
2093 
2094   const auto &TestProf = TestReader->getProfiles();
2095   for (const auto &I : TestProf) {
2096     ++ProfOverlap.TestCount;
2097     FuncSampleStats FuncStats;
2098     getFuncSampleStats(I.second, FuncStats, TestHotThreshold);
2099     ProfOverlap.TestSample += FuncStats.SampleSum;
2100     TestStats.emplace(I.second.getContext(), FuncStats);
2101   }
2102 
2103   ProfOverlap.BaseName = StringRef(BaseFilename);
2104   ProfOverlap.TestName = StringRef(TestFilename);
2105 }
2106 
2107 void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
2108   using namespace sampleprof;
2109 
2110   if (FuncSimilarityDump.empty())
2111     return;
2112 
2113   formatted_raw_ostream FOS(OS);
2114   FOS << "Function-level details:\n";
2115   FOS << "Base weight";
2116   FOS.PadToColumn(TestWeightCol);
2117   FOS << "Test weight";
2118   FOS.PadToColumn(SimilarityCol);
2119   FOS << "Similarity";
2120   FOS.PadToColumn(OverlapCol);
2121   FOS << "Overlap";
2122   FOS.PadToColumn(BaseUniqueCol);
2123   FOS << "Base unique";
2124   FOS.PadToColumn(TestUniqueCol);
2125   FOS << "Test unique";
2126   FOS.PadToColumn(BaseSampleCol);
2127   FOS << "Base samples";
2128   FOS.PadToColumn(TestSampleCol);
2129   FOS << "Test samples";
2130   FOS.PadToColumn(FuncNameCol);
2131   FOS << "Function name\n";
2132   for (const auto &F : FuncSimilarityDump) {
2133     double OverlapPercent =
2134         F.second.UnionSample > 0
2135             ? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample
2136             : 0;
2137     double BaseUniquePercent =
2138         F.second.BaseSample > 0
2139             ? static_cast<double>(F.second.BaseUniqueSample) /
2140                   F.second.BaseSample
2141             : 0;
2142     double TestUniquePercent =
2143         F.second.TestSample > 0
2144             ? static_cast<double>(F.second.TestUniqueSample) /
2145                   F.second.TestSample
2146             : 0;
2147 
2148     FOS << format("%.2f%%", F.second.BaseWeight * 100);
2149     FOS.PadToColumn(TestWeightCol);
2150     FOS << format("%.2f%%", F.second.TestWeight * 100);
2151     FOS.PadToColumn(SimilarityCol);
2152     FOS << format("%.2f%%", F.second.Similarity * 100);
2153     FOS.PadToColumn(OverlapCol);
2154     FOS << format("%.2f%%", OverlapPercent * 100);
2155     FOS.PadToColumn(BaseUniqueCol);
2156     FOS << format("%.2f%%", BaseUniquePercent * 100);
2157     FOS.PadToColumn(TestUniqueCol);
2158     FOS << format("%.2f%%", TestUniquePercent * 100);
2159     FOS.PadToColumn(BaseSampleCol);
2160     FOS << F.second.BaseSample;
2161     FOS.PadToColumn(TestSampleCol);
2162     FOS << F.second.TestSample;
2163     FOS.PadToColumn(FuncNameCol);
2164     FOS << F.second.TestName.toString() << "\n";
2165   }
2166 }
2167 
2168 void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
2169   OS << "Profile overlap infomation for base_profile: "
2170      << ProfOverlap.BaseName.toString()
2171      << " and test_profile: " << ProfOverlap.TestName.toString()
2172      << "\nProgram level:\n";
2173 
2174   OS << "  Whole program profile similarity: "
2175      << format("%.3f%%", ProfOverlap.Similarity * 100) << "\n";
2176 
2177   assert(ProfOverlap.UnionSample > 0 &&
2178          "Total samples in two profile should be greater than 0");
2179   double OverlapPercent =
2180       static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample;
2181   assert(ProfOverlap.BaseSample > 0 &&
2182          "Total samples in base profile should be greater than 0");
2183   double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) /
2184                              ProfOverlap.BaseSample;
2185   assert(ProfOverlap.TestSample > 0 &&
2186          "Total samples in test profile should be greater than 0");
2187   double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) /
2188                              ProfOverlap.TestSample;
2189 
2190   OS << "  Whole program sample overlap: "
2191      << format("%.3f%%", OverlapPercent * 100) << "\n";
2192   OS << "    percentage of samples unique in base profile: "
2193      << format("%.3f%%", BaseUniquePercent * 100) << "\n";
2194   OS << "    percentage of samples unique in test profile: "
2195      << format("%.3f%%", TestUniquePercent * 100) << "\n";
2196   OS << "    total samples in base profile: " << ProfOverlap.BaseSample << "\n"
2197      << "    total samples in test profile: " << ProfOverlap.TestSample << "\n";
2198 
2199   assert(ProfOverlap.UnionCount > 0 &&
2200          "There should be at least one function in two input profiles");
2201   double FuncOverlapPercent =
2202       static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount;
2203   OS << "  Function overlap: " << format("%.3f%%", FuncOverlapPercent * 100)
2204      << "\n";
2205   OS << "    overlap functions: " << ProfOverlap.OverlapCount << "\n";
2206   OS << "    functions unique in base profile: " << ProfOverlap.BaseUniqueCount
2207      << "\n";
2208   OS << "    functions unique in test profile: " << ProfOverlap.TestUniqueCount
2209      << "\n";
2210 }
2211 
2212 void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2213     raw_fd_ostream &OS) const {
2214   assert(HotFuncOverlap.UnionCount > 0 &&
2215          "There should be at least one hot function in two input profiles");
2216   OS << "  Hot-function overlap: "
2217      << format("%.3f%%", static_cast<double>(HotFuncOverlap.OverlapCount) /
2218                              HotFuncOverlap.UnionCount * 100)
2219      << "\n";
2220   OS << "    overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n";
2221   OS << "    hot functions unique in base profile: "
2222      << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
2223   OS << "    hot functions unique in test profile: "
2224      << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
2225 
2226   assert(HotBlockOverlap.UnionCount > 0 &&
2227          "There should be at least one hot block in two input profiles");
2228   OS << "  Hot-block overlap: "
2229      << format("%.3f%%", static_cast<double>(HotBlockOverlap.OverlapCount) /
2230                              HotBlockOverlap.UnionCount * 100)
2231      << "\n";
2232   OS << "    overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n";
2233   OS << "    hot blocks unique in base profile: "
2234      << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
2235   OS << "    hot blocks unique in test profile: "
2236      << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
2237 }
2238 
2239 std::error_code SampleOverlapAggregator::loadProfiles() {
2240   using namespace sampleprof;
2241 
2242   LLVMContext Context;
2243   auto FS = vfs::getRealFileSystem();
2244   auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, *FS,
2245                                                      FSDiscriminatorPassOption);
2246   if (std::error_code EC = BaseReaderOrErr.getError())
2247     exitWithErrorCode(EC, BaseFilename);
2248 
2249   auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, *FS,
2250                                                      FSDiscriminatorPassOption);
2251   if (std::error_code EC = TestReaderOrErr.getError())
2252     exitWithErrorCode(EC, TestFilename);
2253 
2254   BaseReader = std::move(BaseReaderOrErr.get());
2255   TestReader = std::move(TestReaderOrErr.get());
2256 
2257   if (std::error_code EC = BaseReader->read())
2258     exitWithErrorCode(EC, BaseFilename);
2259   if (std::error_code EC = TestReader->read())
2260     exitWithErrorCode(EC, TestFilename);
2261   if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
2262     exitWithError(
2263         "cannot compare probe-based profile with non-probe-based profile");
2264   if (BaseReader->profileIsCS() != TestReader->profileIsCS())
2265     exitWithError("cannot compare CS profile with non-CS profile");
2266 
2267   // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2268   // profile summary.
2269   ProfileSummary &BasePS = BaseReader->getSummary();
2270   ProfileSummary &TestPS = TestReader->getSummary();
2271   BaseHotThreshold =
2272       ProfileSummaryBuilder::getHotCountThreshold(BasePS.getDetailedSummary());
2273   TestHotThreshold =
2274       ProfileSummaryBuilder::getHotCountThreshold(TestPS.getDetailedSummary());
2275 
2276   return std::error_code();
2277 }
2278 
2279 void overlapSampleProfile(const std::string &BaseFilename,
2280                           const std::string &TestFilename,
2281                           const OverlapFuncFilters &FuncFilter,
2282                           uint64_t SimilarityCutoff, raw_fd_ostream &OS) {
2283   using namespace sampleprof;
2284 
2285   // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2286   // report 2--3 places after decimal point in percentage numbers.
2287   SampleOverlapAggregator OverlapAggr(
2288       BaseFilename, TestFilename,
2289       static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter);
2290   if (std::error_code EC = OverlapAggr.loadProfiles())
2291     exitWithErrorCode(EC);
2292 
2293   OverlapAggr.initializeSampleProfileOverlap();
2294   if (OverlapAggr.detectZeroSampleProfile(OS))
2295     return;
2296 
2297   OverlapAggr.computeSampleProfileOverlap(OS);
2298 
2299   OverlapAggr.dumpProgramSummary(OS);
2300   OverlapAggr.dumpHotFuncAndBlockOverlap(OS);
2301   OverlapAggr.dumpFuncSimilarity(OS);
2302 }
2303 
2304 static int overlap_main(int argc, const char *argv[]) {
2305   cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
2306                                     cl::desc("<base profile file>"));
2307   cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
2308                                     cl::desc("<test profile file>"));
2309   cl::opt<std::string> Output("output", cl::value_desc("output"), cl::init("-"),
2310                               cl::desc("Output file"));
2311   cl::alias OutputA("o", cl::desc("Alias for --output"), cl::aliasopt(Output));
2312   cl::opt<bool> IsCS(
2313       "cs", cl::init(false),
2314       cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."));
2315   cl::opt<unsigned long long> ValueCutoff(
2316       "value-cutoff", cl::init(-1),
2317       cl::desc(
2318           "Function level overlap information for every function (with calling "
2319           "context for csspgo) in test "
2320           "profile with max count value greater then the parameter value"));
2321   cl::opt<std::string> FuncNameFilter(
2322       "function",
2323       cl::desc("Function level overlap information for matching functions. For "
2324                "CSSPGO this takes a a function name with calling context"));
2325   cl::opt<unsigned long long> SimilarityCutoff(
2326       "similarity-cutoff", cl::init(0),
2327       cl::desc("For sample profiles, list function names (with calling context "
2328                "for csspgo) for overlapped functions "
2329                "with similarities below the cutoff (percentage times 10000)."));
2330   cl::opt<ProfileKinds> ProfileKind(
2331       cl::desc("Profile kind:"), cl::init(instr),
2332       cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
2333                  clEnumVal(sample, "Sample profile")));
2334   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data overlap tool\n");
2335 
2336   std::error_code EC;
2337   raw_fd_ostream OS(Output.data(), EC, sys::fs::OF_TextWithCRLF);
2338   if (EC)
2339     exitWithErrorCode(EC, Output);
2340 
2341   if (ProfileKind == instr)
2342     overlapInstrProfile(BaseFilename, TestFilename,
2343                         OverlapFuncFilters{ValueCutoff, FuncNameFilter}, OS,
2344                         IsCS);
2345   else
2346     overlapSampleProfile(BaseFilename, TestFilename,
2347                          OverlapFuncFilters{ValueCutoff, FuncNameFilter},
2348                          SimilarityCutoff, OS);
2349 
2350   return 0;
2351 }
2352 
2353 namespace {
2354 struct ValueSitesStats {
2355   ValueSitesStats()
2356       : TotalNumValueSites(0), TotalNumValueSitesWithValueProfile(0),
2357         TotalNumValues(0) {}
2358   uint64_t TotalNumValueSites;
2359   uint64_t TotalNumValueSitesWithValueProfile;
2360   uint64_t TotalNumValues;
2361   std::vector<unsigned> ValueSitesHistogram;
2362 };
2363 } // namespace
2364 
2365 static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
2366                                   ValueSitesStats &Stats, raw_fd_ostream &OS,
2367                                   InstrProfSymtab *Symtab) {
2368   uint32_t NS = Func.getNumValueSites(VK);
2369   Stats.TotalNumValueSites += NS;
2370   for (size_t I = 0; I < NS; ++I) {
2371     uint32_t NV = Func.getNumValueDataForSite(VK, I);
2372     std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, I);
2373     Stats.TotalNumValues += NV;
2374     if (NV) {
2375       Stats.TotalNumValueSitesWithValueProfile++;
2376       if (NV > Stats.ValueSitesHistogram.size())
2377         Stats.ValueSitesHistogram.resize(NV, 0);
2378       Stats.ValueSitesHistogram[NV - 1]++;
2379     }
2380 
2381     uint64_t SiteSum = 0;
2382     for (uint32_t V = 0; V < NV; V++)
2383       SiteSum += VD[V].Count;
2384     if (SiteSum == 0)
2385       SiteSum = 1;
2386 
2387     for (uint32_t V = 0; V < NV; V++) {
2388       OS << "\t[ " << format("%2u", I) << ", ";
2389       if (Symtab == nullptr)
2390         OS << format("%4" PRIu64, VD[V].Value);
2391       else
2392         OS << Symtab->getFuncName(VD[V].Value);
2393       OS << ", " << format("%10" PRId64, VD[V].Count) << " ] ("
2394          << format("%.2f%%", (VD[V].Count * 100.0 / SiteSum)) << ")\n";
2395     }
2396   }
2397 }
2398 
2399 static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
2400                                 ValueSitesStats &Stats) {
2401   OS << "  Total number of sites: " << Stats.TotalNumValueSites << "\n";
2402   OS << "  Total number of sites with values: "
2403      << Stats.TotalNumValueSitesWithValueProfile << "\n";
2404   OS << "  Total number of profiled values: " << Stats.TotalNumValues << "\n";
2405 
2406   OS << "  Value sites histogram:\n\tNumTargets, SiteCount\n";
2407   for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) {
2408     if (Stats.ValueSitesHistogram[I] > 0)
2409       OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n";
2410   }
2411 }
2412 
2413 static int showInstrProfile(
2414     const std::string &Filename, bool ShowCounts, uint32_t TopN,
2415     bool ShowIndirectCallTargets, bool ShowMemOPSizes, bool ShowDetailedSummary,
2416     std::vector<uint32_t> DetailedSummaryCutoffs, bool ShowAllFunctions,
2417     bool ShowCS, uint64_t ValueCutoff, bool OnlyListBelow,
2418     const std::string &ShowFunction, bool TextFormat, bool ShowBinaryIds,
2419     bool ShowCovered, bool ShowProfileVersion, bool ShowTemporalProfTraces,
2420     ShowFormat SFormat, raw_fd_ostream &OS) {
2421   if (SFormat == ShowFormat::Json)
2422     exitWithError("JSON output is not supported for instr profiles");
2423   if (SFormat == ShowFormat::Yaml)
2424     exitWithError("YAML output is not supported for instr profiles");
2425   auto FS = vfs::getRealFileSystem();
2426   auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
2427   std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
2428   if (ShowDetailedSummary && Cutoffs.empty()) {
2429     Cutoffs = ProfileSummaryBuilder::DefaultCutoffs;
2430   }
2431   InstrProfSummaryBuilder Builder(std::move(Cutoffs));
2432   if (Error E = ReaderOrErr.takeError())
2433     exitWithError(std::move(E), Filename);
2434 
2435   auto Reader = std::move(ReaderOrErr.get());
2436   bool IsIRInstr = Reader->isIRLevelProfile();
2437   size_t ShownFunctions = 0;
2438   size_t BelowCutoffFunctions = 0;
2439   int NumVPKind = IPVK_Last - IPVK_First + 1;
2440   std::vector<ValueSitesStats> VPStats(NumVPKind);
2441 
2442   auto MinCmp = [](const std::pair<std::string, uint64_t> &v1,
2443                    const std::pair<std::string, uint64_t> &v2) {
2444     return v1.second > v2.second;
2445   };
2446 
2447   std::priority_queue<std::pair<std::string, uint64_t>,
2448                       std::vector<std::pair<std::string, uint64_t>>,
2449                       decltype(MinCmp)>
2450       HottestFuncs(MinCmp);
2451 
2452   if (!TextFormat && OnlyListBelow) {
2453     OS << "The list of functions with the maximum counter less than "
2454        << ValueCutoff << ":\n";
2455   }
2456 
2457   // Add marker so that IR-level instrumentation round-trips properly.
2458   if (TextFormat && IsIRInstr)
2459     OS << ":ir\n";
2460 
2461   for (const auto &Func : *Reader) {
2462     if (Reader->isIRLevelProfile()) {
2463       bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
2464       if (FuncIsCS != ShowCS)
2465         continue;
2466     }
2467     bool Show = ShowAllFunctions ||
2468                 (!ShowFunction.empty() && Func.Name.contains(ShowFunction));
2469 
2470     bool doTextFormatDump = (Show && TextFormat);
2471 
2472     if (doTextFormatDump) {
2473       InstrProfSymtab &Symtab = Reader->getSymtab();
2474       InstrProfWriter::writeRecordInText(Func.Name, Func.Hash, Func, Symtab,
2475                                          OS);
2476       continue;
2477     }
2478 
2479     assert(Func.Counts.size() > 0 && "function missing entry counter");
2480     Builder.addRecord(Func);
2481 
2482     if (ShowCovered) {
2483       if (llvm::any_of(Func.Counts, [](uint64_t C) { return C; }))
2484         OS << Func.Name << "\n";
2485       continue;
2486     }
2487 
2488     uint64_t FuncMax = 0;
2489     uint64_t FuncSum = 0;
2490 
2491     auto PseudoKind = Func.getCountPseudoKind();
2492     if (PseudoKind != InstrProfRecord::NotPseudo) {
2493       if (Show) {
2494         if (!ShownFunctions)
2495           OS << "Counters:\n";
2496         ++ShownFunctions;
2497         OS << "  " << Func.Name << ":\n"
2498            << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
2499            << "    Counters: " << Func.Counts.size();
2500         if (PseudoKind == InstrProfRecord::PseudoHot)
2501           OS << "    <PseudoHot>\n";
2502         else if (PseudoKind == InstrProfRecord::PseudoWarm)
2503           OS << "    <PseudoWarm>\n";
2504         else
2505           llvm_unreachable("Unknown PseudoKind");
2506       }
2507       continue;
2508     }
2509 
2510     for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
2511       FuncMax = std::max(FuncMax, Func.Counts[I]);
2512       FuncSum += Func.Counts[I];
2513     }
2514 
2515     if (FuncMax < ValueCutoff) {
2516       ++BelowCutoffFunctions;
2517       if (OnlyListBelow) {
2518         OS << "  " << Func.Name << ": (Max = " << FuncMax
2519            << " Sum = " << FuncSum << ")\n";
2520       }
2521       continue;
2522     } else if (OnlyListBelow)
2523       continue;
2524 
2525     if (TopN) {
2526       if (HottestFuncs.size() == TopN) {
2527         if (HottestFuncs.top().second < FuncMax) {
2528           HottestFuncs.pop();
2529           HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
2530         }
2531       } else
2532         HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
2533     }
2534 
2535     if (Show) {
2536       if (!ShownFunctions)
2537         OS << "Counters:\n";
2538 
2539       ++ShownFunctions;
2540 
2541       OS << "  " << Func.Name << ":\n"
2542          << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
2543          << "    Counters: " << Func.Counts.size() << "\n";
2544       if (!IsIRInstr)
2545         OS << "    Function count: " << Func.Counts[0] << "\n";
2546 
2547       if (ShowIndirectCallTargets)
2548         OS << "    Indirect Call Site Count: "
2549            << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n";
2550 
2551       uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize);
2552       if (ShowMemOPSizes && NumMemOPCalls > 0)
2553         OS << "    Number of Memory Intrinsics Calls: " << NumMemOPCalls
2554            << "\n";
2555 
2556       if (ShowCounts) {
2557         OS << "    Block counts: [";
2558         size_t Start = (IsIRInstr ? 0 : 1);
2559         for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) {
2560           OS << (I == Start ? "" : ", ") << Func.Counts[I];
2561         }
2562         OS << "]\n";
2563       }
2564 
2565       if (ShowIndirectCallTargets) {
2566         OS << "    Indirect Target Results:\n";
2567         traverseAllValueSites(Func, IPVK_IndirectCallTarget,
2568                               VPStats[IPVK_IndirectCallTarget], OS,
2569                               &(Reader->getSymtab()));
2570       }
2571 
2572       if (ShowMemOPSizes && NumMemOPCalls > 0) {
2573         OS << "    Memory Intrinsic Size Results:\n";
2574         traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS,
2575                               nullptr);
2576       }
2577     }
2578   }
2579   if (Reader->hasError())
2580     exitWithError(Reader->getError(), Filename);
2581 
2582   if (TextFormat || ShowCovered)
2583     return 0;
2584   std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
2585   bool IsIR = Reader->isIRLevelProfile();
2586   OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
2587   if (IsIR)
2588     OS << "  entry_first = " << Reader->instrEntryBBEnabled();
2589   OS << "\n";
2590   if (ShowAllFunctions || !ShowFunction.empty())
2591     OS << "Functions shown: " << ShownFunctions << "\n";
2592   OS << "Total functions: " << PS->getNumFunctions() << "\n";
2593   if (ValueCutoff > 0) {
2594     OS << "Number of functions with maximum count (< " << ValueCutoff
2595        << "): " << BelowCutoffFunctions << "\n";
2596     OS << "Number of functions with maximum count (>= " << ValueCutoff
2597        << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
2598   }
2599   OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n";
2600   OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n";
2601 
2602   if (TopN) {
2603     std::vector<std::pair<std::string, uint64_t>> SortedHottestFuncs;
2604     while (!HottestFuncs.empty()) {
2605       SortedHottestFuncs.emplace_back(HottestFuncs.top());
2606       HottestFuncs.pop();
2607     }
2608     OS << "Top " << TopN
2609        << " functions with the largest internal block counts: \n";
2610     for (auto &hotfunc : llvm::reverse(SortedHottestFuncs))
2611       OS << "  " << hotfunc.first << ", max count = " << hotfunc.second << "\n";
2612   }
2613 
2614   if (ShownFunctions && ShowIndirectCallTargets) {
2615     OS << "Statistics for indirect call sites profile:\n";
2616     showValueSitesStats(OS, IPVK_IndirectCallTarget,
2617                         VPStats[IPVK_IndirectCallTarget]);
2618   }
2619 
2620   if (ShownFunctions && ShowMemOPSizes) {
2621     OS << "Statistics for memory intrinsic calls sizes profile:\n";
2622     showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]);
2623   }
2624 
2625   if (ShowDetailedSummary) {
2626     OS << "Total number of blocks: " << PS->getNumCounts() << "\n";
2627     OS << "Total count: " << PS->getTotalCount() << "\n";
2628     PS->printDetailedSummary(OS);
2629   }
2630 
2631   if (ShowBinaryIds)
2632     if (Error E = Reader->printBinaryIds(OS))
2633       exitWithError(std::move(E), Filename);
2634 
2635   if (ShowProfileVersion)
2636     OS << "Profile version: " << Reader->getVersion() << "\n";
2637 
2638   if (ShowTemporalProfTraces) {
2639     auto &Traces = Reader->getTemporalProfTraces();
2640     OS << "Temporal Profile Traces (samples=" << Traces.size()
2641        << " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n";
2642     for (unsigned i = 0; i < Traces.size(); i++) {
2643       OS << "  Temporal Profile Trace " << i << " (weight=" << Traces[i].Weight
2644          << " count=" << Traces[i].FunctionNameRefs.size() << "):\n";
2645       for (auto &NameRef : Traces[i].FunctionNameRefs)
2646         OS << "    " << Reader->getSymtab().getFuncName(NameRef) << "\n";
2647     }
2648   }
2649 
2650   return 0;
2651 }
2652 
2653 static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
2654                             raw_fd_ostream &OS) {
2655   if (!Reader->dumpSectionInfo(OS)) {
2656     WithColor::warning() << "-show-sec-info-only is only supported for "
2657                          << "sample profile in extbinary format and is "
2658                          << "ignored for other formats.\n";
2659     return;
2660   }
2661 }
2662 
2663 namespace {
2664 struct HotFuncInfo {
2665   std::string FuncName;
2666   uint64_t TotalCount;
2667   double TotalCountPercent;
2668   uint64_t MaxCount;
2669   uint64_t EntryCount;
2670 
2671   HotFuncInfo()
2672       : TotalCount(0), TotalCountPercent(0.0f), MaxCount(0), EntryCount(0) {}
2673 
2674   HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
2675       : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP),
2676         MaxCount(MS), EntryCount(ES) {}
2677 };
2678 } // namespace
2679 
2680 // Print out detailed information about hot functions in PrintValues vector.
2681 // Users specify titles and offset of every columns through ColumnTitle and
2682 // ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
2683 // and at least 4. Besides, users can optionally give a HotFuncMetric string to
2684 // print out or let it be an empty string.
2685 static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
2686                                 const std::vector<int> &ColumnOffset,
2687                                 const std::vector<HotFuncInfo> &PrintValues,
2688                                 uint64_t HotFuncCount, uint64_t TotalFuncCount,
2689                                 uint64_t HotProfCount, uint64_t TotalProfCount,
2690                                 const std::string &HotFuncMetric,
2691                                 uint32_t TopNFunctions, raw_fd_ostream &OS) {
2692   assert(ColumnOffset.size() == ColumnTitle.size() &&
2693          "ColumnOffset and ColumnTitle should have the same size");
2694   assert(ColumnTitle.size() >= 4 &&
2695          "ColumnTitle should have at least 4 elements");
2696   assert(TotalFuncCount > 0 &&
2697          "There should be at least one function in the profile");
2698   double TotalProfPercent = 0;
2699   if (TotalProfCount > 0)
2700     TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100;
2701 
2702   formatted_raw_ostream FOS(OS);
2703   FOS << HotFuncCount << " out of " << TotalFuncCount
2704       << " functions with profile ("
2705       << format("%.2f%%",
2706                 (static_cast<double>(HotFuncCount) / TotalFuncCount * 100))
2707       << ") are considered hot functions";
2708   if (!HotFuncMetric.empty())
2709     FOS << " (" << HotFuncMetric << ")";
2710   FOS << ".\n";
2711   FOS << HotProfCount << " out of " << TotalProfCount << " profile counts ("
2712       << format("%.2f%%", TotalProfPercent) << ") are from hot functions.\n";
2713 
2714   for (size_t I = 0; I < ColumnTitle.size(); ++I) {
2715     FOS.PadToColumn(ColumnOffset[I]);
2716     FOS << ColumnTitle[I];
2717   }
2718   FOS << "\n";
2719 
2720   uint32_t Count = 0;
2721   for (const auto &R : PrintValues) {
2722     if (TopNFunctions && (Count++ == TopNFunctions))
2723       break;
2724     FOS.PadToColumn(ColumnOffset[0]);
2725     FOS << R.TotalCount << " (" << format("%.2f%%", R.TotalCountPercent) << ")";
2726     FOS.PadToColumn(ColumnOffset[1]);
2727     FOS << R.MaxCount;
2728     FOS.PadToColumn(ColumnOffset[2]);
2729     FOS << R.EntryCount;
2730     FOS.PadToColumn(ColumnOffset[3]);
2731     FOS << R.FuncName << "\n";
2732   }
2733 }
2734 
2735 static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles,
2736                                ProfileSummary &PS, uint32_t TopN,
2737                                raw_fd_ostream &OS) {
2738   using namespace sampleprof;
2739 
2740   const uint32_t HotFuncCutoff = 990000;
2741   auto &SummaryVector = PS.getDetailedSummary();
2742   uint64_t MinCountThreshold = 0;
2743   for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) {
2744     if (SummaryEntry.Cutoff == HotFuncCutoff) {
2745       MinCountThreshold = SummaryEntry.MinCount;
2746       break;
2747     }
2748   }
2749 
2750   // Traverse all functions in the profile and keep only hot functions.
2751   // The following loop also calculates the sum of total samples of all
2752   // functions.
2753   std::multimap<uint64_t, std::pair<const FunctionSamples *, const uint64_t>,
2754                 std::greater<uint64_t>>
2755       HotFunc;
2756   uint64_t ProfileTotalSample = 0;
2757   uint64_t HotFuncSample = 0;
2758   uint64_t HotFuncCount = 0;
2759 
2760   for (const auto &I : Profiles) {
2761     FuncSampleStats FuncStats;
2762     const FunctionSamples &FuncProf = I.second;
2763     ProfileTotalSample += FuncProf.getTotalSamples();
2764     getFuncSampleStats(FuncProf, FuncStats, MinCountThreshold);
2765 
2766     if (isFunctionHot(FuncStats, MinCountThreshold)) {
2767       HotFunc.emplace(FuncProf.getTotalSamples(),
2768                       std::make_pair(&(I.second), FuncStats.MaxSample));
2769       HotFuncSample += FuncProf.getTotalSamples();
2770       ++HotFuncCount;
2771     }
2772   }
2773 
2774   std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample",
2775                                        "Entry sample", "Function name"};
2776   std::vector<int> ColumnOffset{0, 24, 42, 58};
2777   std::string Metric =
2778       std::string("max sample >= ") + std::to_string(MinCountThreshold);
2779   std::vector<HotFuncInfo> PrintValues;
2780   for (const auto &FuncPair : HotFunc) {
2781     const FunctionSamples &Func = *FuncPair.second.first;
2782     double TotalSamplePercent =
2783         (ProfileTotalSample > 0)
2784             ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
2785             : 0;
2786     PrintValues.emplace_back(
2787         HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(),
2788                     TotalSamplePercent, FuncPair.second.second,
2789                     Func.getHeadSamplesEstimate()));
2790   }
2791   dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
2792                       Profiles.size(), HotFuncSample, ProfileTotalSample,
2793                       Metric, TopN, OS);
2794 
2795   return 0;
2796 }
2797 
2798 static int showSampleProfile(const std::string &Filename, bool ShowCounts,
2799                              uint32_t TopN, bool ShowAllFunctions,
2800                              bool ShowDetailedSummary,
2801                              const std::string &ShowFunction,
2802                              bool ShowProfileSymbolList,
2803                              bool ShowSectionInfoOnly, bool ShowHotFuncList,
2804                              ShowFormat SFormat, raw_fd_ostream &OS) {
2805   if (SFormat == ShowFormat::Yaml)
2806     exitWithError("YAML output is not supported for sample profiles");
2807   using namespace sampleprof;
2808   LLVMContext Context;
2809   auto FS = vfs::getRealFileSystem();
2810   auto ReaderOrErr = SampleProfileReader::create(Filename, Context, *FS,
2811                                                  FSDiscriminatorPassOption);
2812   if (std::error_code EC = ReaderOrErr.getError())
2813     exitWithErrorCode(EC, Filename);
2814 
2815   auto Reader = std::move(ReaderOrErr.get());
2816   if (ShowSectionInfoOnly) {
2817     showSectionInfo(Reader.get(), OS);
2818     return 0;
2819   }
2820 
2821   if (std::error_code EC = Reader->read())
2822     exitWithErrorCode(EC, Filename);
2823 
2824   if (ShowAllFunctions || ShowFunction.empty()) {
2825     if (SFormat == ShowFormat::Json)
2826       Reader->dumpJson(OS);
2827     else
2828       Reader->dump(OS);
2829   } else {
2830     if (SFormat == ShowFormat::Json)
2831       exitWithError(
2832           "the JSON format is supported only when all functions are to "
2833           "be printed");
2834 
2835     // TODO: parse context string to support filtering by contexts.
2836     Reader->dumpFunctionProfile(StringRef(ShowFunction), OS);
2837   }
2838 
2839   if (ShowProfileSymbolList) {
2840     std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
2841         Reader->getProfileSymbolList();
2842     ReaderList->dump(OS);
2843   }
2844 
2845   if (ShowDetailedSummary) {
2846     auto &PS = Reader->getSummary();
2847     PS.printSummary(OS);
2848     PS.printDetailedSummary(OS);
2849   }
2850 
2851   if (ShowHotFuncList || TopN)
2852     showHotFunctionList(Reader->getProfiles(), Reader->getSummary(), TopN, OS);
2853 
2854   return 0;
2855 }
2856 
2857 static int showMemProfProfile(const std::string &Filename,
2858                               const std::string &ProfiledBinary,
2859                               ShowFormat SFormat, raw_fd_ostream &OS) {
2860   if (SFormat == ShowFormat::Json)
2861     exitWithError("JSON output is not supported for MemProf");
2862   auto ReaderOr = llvm::memprof::RawMemProfReader::create(
2863       Filename, ProfiledBinary, /*KeepNames=*/true);
2864   if (Error E = ReaderOr.takeError())
2865     // Since the error can be related to the profile or the binary we do not
2866     // pass whence. Instead additional context is provided where necessary in
2867     // the error message.
2868     exitWithError(std::move(E), /*Whence*/ "");
2869 
2870   std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
2871       ReaderOr.get().release());
2872 
2873   Reader->printYAML(OS);
2874   return 0;
2875 }
2876 
2877 static int showDebugInfoCorrelation(const std::string &Filename,
2878                                     bool ShowDetailedSummary,
2879                                     bool ShowProfileSymbolList,
2880                                     ShowFormat SFormat, raw_fd_ostream &OS) {
2881   if (SFormat == ShowFormat::Json)
2882     exitWithError("JSON output is not supported for debug info correlation");
2883   std::unique_ptr<InstrProfCorrelator> Correlator;
2884   if (auto Err = InstrProfCorrelator::get(Filename).moveInto(Correlator))
2885     exitWithError(std::move(Err), Filename);
2886   if (SFormat == ShowFormat::Yaml) {
2887     if (auto Err = Correlator->dumpYaml(OS))
2888       exitWithError(std::move(Err), Filename);
2889     return 0;
2890   }
2891 
2892   if (auto Err = Correlator->correlateProfileData())
2893     exitWithError(std::move(Err), Filename);
2894 
2895   InstrProfSymtab Symtab;
2896   if (auto Err = Symtab.create(
2897           StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize())))
2898     exitWithError(std::move(Err), Filename);
2899 
2900   if (ShowProfileSymbolList)
2901     Symtab.dumpNames(OS);
2902   // TODO: Read "Profile Data Type" from debug info to compute and show how many
2903   // counters the section holds.
2904   if (ShowDetailedSummary)
2905     OS << "Counters section size: 0x"
2906        << Twine::utohexstr(Correlator->getCountersSectionSize()) << " bytes\n";
2907   OS << "Found " << Correlator->getDataSize() << " functions\n";
2908 
2909   return 0;
2910 }
2911 
2912 static int show_main(int argc, const char *argv[]) {
2913   cl::opt<std::string> Filename(cl::Positional, cl::desc("<profdata-file>"));
2914 
2915   cl::opt<bool> ShowCounts("counts", cl::init(false),
2916                            cl::desc("Show counter values for shown functions"));
2917   cl::opt<ShowFormat> SFormat(
2918       "show-format", cl::init(ShowFormat::Text),
2919       cl::desc("Emit output in the selected format if supported"),
2920       cl::values(clEnumValN(ShowFormat::Text, "text",
2921                             "emit normal text output (default)"),
2922                  clEnumValN(ShowFormat::Json, "json", "emit JSON"),
2923                  clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML")));
2924   // TODO: Consider replacing this with `--show-format=text-encoding`.
2925   cl::opt<bool> TextFormat(
2926       "text", cl::init(false),
2927       cl::desc("Show instr profile data in text dump format"));
2928   cl::opt<bool> JsonFormat(
2929       "json", cl::desc("Show sample profile data in the JSON format "
2930                        "(deprecated, please use --show-format=json)"));
2931   cl::opt<bool> ShowIndirectCallTargets(
2932       "ic-targets", cl::init(false),
2933       cl::desc("Show indirect call site target values for shown functions"));
2934   cl::opt<bool> ShowMemOPSizes(
2935       "memop-sizes", cl::init(false),
2936       cl::desc("Show the profiled sizes of the memory intrinsic calls "
2937                "for shown functions"));
2938   cl::opt<bool> ShowDetailedSummary("detailed-summary", cl::init(false),
2939                                     cl::desc("Show detailed profile summary"));
2940   cl::list<uint32_t> DetailedSummaryCutoffs(
2941       cl::CommaSeparated, "detailed-summary-cutoffs",
2942       cl::desc(
2943           "Cutoff percentages (times 10000) for generating detailed summary"),
2944       cl::value_desc("800000,901000,999999"));
2945   cl::opt<bool> ShowHotFuncList(
2946       "hot-func-list", cl::init(false),
2947       cl::desc("Show profile summary of a list of hot functions"));
2948   cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
2949                                  cl::desc("Details for every function"));
2950   cl::opt<bool> ShowCS("showcs", cl::init(false),
2951                        cl::desc("Show context sensitive counts"));
2952   cl::opt<std::string> ShowFunction("function",
2953                                     cl::desc("Details for matching functions"));
2954 
2955   cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
2956                                       cl::init("-"), cl::desc("Output file"));
2957   cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
2958                             cl::aliasopt(OutputFilename));
2959   cl::opt<ProfileKinds> ProfileKind(
2960       cl::desc("Profile kind:"), cl::init(instr),
2961       cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
2962                  clEnumVal(sample, "Sample profile"),
2963                  clEnumVal(memory, "MemProf memory access profile")));
2964   cl::opt<uint32_t> TopNFunctions(
2965       "topn", cl::init(0),
2966       cl::desc("Show the list of functions with the largest internal counts"));
2967   cl::opt<uint32_t> ValueCutoff(
2968       "value-cutoff", cl::init(0),
2969       cl::desc("Set the count value cutoff. Functions with the maximum count "
2970                "less than this value will not be printed out. (Default is 0)"));
2971   cl::opt<bool> OnlyListBelow(
2972       "list-below-cutoff", cl::init(false),
2973       cl::desc("Only output names of functions whose max count values are "
2974                "below the cutoff value"));
2975   cl::opt<bool> ShowProfileSymbolList(
2976       "show-prof-sym-list", cl::init(false),
2977       cl::desc("Show profile symbol list if it exists in the profile. "));
2978   cl::opt<bool> ShowSectionInfoOnly(
2979       "show-sec-info-only", cl::init(false),
2980       cl::desc("Show the information of each section in the sample profile. "
2981                "The flag is only usable when the sample profile is in "
2982                "extbinary format"));
2983   cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(false),
2984                               cl::desc("Show binary ids in the profile. "));
2985   cl::opt<bool> ShowTemporalProfTraces(
2986       "temporal-profile-traces",
2987       cl::desc("Show temporal profile traces in the profile."));
2988   cl::opt<std::string> DebugInfoFilename(
2989       "debug-info", cl::init(""),
2990       cl::desc("Read and extract profile metadata from debug info and show "
2991                "the functions it found."));
2992   cl::opt<bool> ShowCovered(
2993       "covered", cl::init(false),
2994       cl::desc("Show only the functions that have been executed."));
2995   cl::opt<std::string> ProfiledBinary(
2996       "profiled-binary", cl::init(""),
2997       cl::desc("Path to binary from which the profile was collected."));
2998   cl::opt<bool> ShowProfileVersion("profile-version", cl::init(false),
2999                                    cl::desc("Show profile version. "));
3000   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
3001 
3002   if (Filename.empty() && DebugInfoFilename.empty())
3003     exitWithError(
3004         "the positional argument '<profdata-file>' is required unless '--" +
3005         DebugInfoFilename.ArgStr + "' is provided");
3006 
3007   if (Filename == OutputFilename) {
3008     errs() << sys::path::filename(argv[0])
3009            << ": Input file name cannot be the same as the output file name!\n";
3010     return 1;
3011   }
3012   if (JsonFormat)
3013     SFormat = ShowFormat::Json;
3014 
3015   std::error_code EC;
3016   raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3017   if (EC)
3018     exitWithErrorCode(EC, OutputFilename);
3019 
3020   if (ShowAllFunctions && !ShowFunction.empty())
3021     WithColor::warning() << "-function argument ignored: showing all functions\n";
3022 
3023   if (!DebugInfoFilename.empty())
3024     return showDebugInfoCorrelation(DebugInfoFilename, ShowDetailedSummary,
3025                                     ShowProfileSymbolList, SFormat, OS);
3026 
3027   if (ProfileKind == instr)
3028     return showInstrProfile(
3029         Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets,
3030         ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs,
3031         ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction,
3032         TextFormat, ShowBinaryIds, ShowCovered, ShowProfileVersion,
3033         ShowTemporalProfTraces, SFormat, OS);
3034   if (ProfileKind == sample)
3035     return showSampleProfile(Filename, ShowCounts, TopNFunctions,
3036                              ShowAllFunctions, ShowDetailedSummary,
3037                              ShowFunction, ShowProfileSymbolList,
3038                              ShowSectionInfoOnly, ShowHotFuncList, SFormat, OS);
3039   return showMemProfProfile(Filename, ProfiledBinary, SFormat, OS);
3040 }
3041 
3042 static int order_main(int argc, const char *argv[]) {
3043   cl::opt<std::string> Filename(cl::Positional, cl::desc("<profdata-file>"));
3044   cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
3045                                       cl::init("-"), cl::desc("Output file"));
3046   cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
3047                             cl::aliasopt(OutputFilename));
3048   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data order\n");
3049 
3050   std::error_code EC;
3051   raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3052   if (EC)
3053     exitWithErrorCode(EC, OutputFilename);
3054   auto FS = vfs::getRealFileSystem();
3055   auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
3056   if (Error E = ReaderOrErr.takeError())
3057     exitWithError(std::move(E), Filename);
3058 
3059   auto Reader = std::move(ReaderOrErr.get());
3060   for (auto &I : *Reader) {
3061     // Read all entries
3062     (void)I;
3063   }
3064   auto &Traces = Reader->getTemporalProfTraces();
3065   auto Nodes = TemporalProfTraceTy::createBPFunctionNodes(Traces);
3066   BalancedPartitioningConfig Config;
3067   BalancedPartitioning BP(Config);
3068   BP.run(Nodes);
3069 
3070   WithColor::note() << "# Ordered " << Nodes.size() << " functions\n";
3071   for (auto &N : Nodes) {
3072     auto FuncName = Reader->getSymtab().getFuncName(N.Id);
3073     if (FuncName.contains(':')) {
3074       // GlobalValue::getGlobalIdentifier() prefixes the filename if the symbol
3075       // is local. This logic will break if there is a colon in the filename,
3076       // but we cannot use rsplit() because ObjC symbols can have colons.
3077       auto [Filename, ParsedFuncName] = FuncName.split(':');
3078       // Emit a comment describing where this symbol came from
3079       OS << "# " << Filename << "\n";
3080       FuncName = ParsedFuncName;
3081     }
3082     OS << FuncName << "\n";
3083   }
3084   return 0;
3085 }
3086 
3087 typedef int (*llvm_profdata_subcommand)(int, const char *[]);
3088 
3089 static std::tuple<StringRef, llvm_profdata_subcommand>
3090     llvm_profdata_subcommands[] = {
3091         {"merge", merge_main},
3092         {"show", show_main},
3093         {"order", order_main},
3094         {"overlap", overlap_main},
3095 };
3096 
3097 int llvm_profdata_main(int argc, char **argvNonConst,
3098                        const llvm::ToolContext &) {
3099   const char **argv = const_cast<const char **>(argvNonConst);
3100   InitLLVM X(argc, argv);
3101 
3102   StringRef ProgName(sys::path::filename(argv[0]));
3103   if (argc > 1) {
3104 
3105     llvm_profdata_subcommand func = nullptr;
3106     for (auto [subcmd_name, subcmd_action] : llvm_profdata_subcommands)
3107       if (subcmd_name == argv[1])
3108         func = subcmd_action;
3109 
3110     if (func) {
3111       std::string Invocation(ProgName.str() + " " + argv[1]);
3112       argv[1] = Invocation.c_str();
3113       return func(argc - 1, argv + 1);
3114     }
3115 
3116     if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-help") == 0 ||
3117         strcmp(argv[1], "--help") == 0) {
3118 
3119       errs() << "OVERVIEW: LLVM profile data tools\n\n"
3120              << "USAGE: " << ProgName << " <command> [args...]\n"
3121              << "USAGE: " << ProgName << " <command> -help\n\n"
3122              << "See each individual command --help for more details.\n"
3123              << "Available commands: "
3124              << join(map_range(llvm_profdata_subcommands,
3125                                [](auto const &KV) { return std::get<0>(KV); }),
3126                      ", ")
3127              << "\n";
3128       return 0;
3129     }
3130 
3131     if (strcmp(argv[1], "--version") == 0) {
3132       outs() << ProgName << '\n';
3133       cl::PrintVersionMessage();
3134       return 0;
3135     }
3136   }
3137 
3138   if (argc < 2)
3139     errs() << ProgName << ": No command specified!\n";
3140   else
3141     errs() << ProgName << ": Unknown command!\n";
3142 
3143   errs() << "USAGE: " << ProgName << " <"
3144          << join(map_range(llvm_profdata_subcommands,
3145                            [](auto const &KV) { return std::get<0>(KV); }),
3146                  "|")
3147          << "> [args...]\n";
3148   return 1;
3149 }
3150