1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
11 //
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
14 //
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
17 //
18 // All three encodings can be used interchangeably as an input sample profile.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/ProfileSummary.h"
28 #include "llvm/ProfileData/ProfileCommon.h"
29 #include "llvm/ProfileData/SampleProf.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Compression.h"
32 #include "llvm/Support/ErrorOr.h"
33 #include "llvm/Support/JSON.h"
34 #include "llvm/Support/LEB128.h"
35 #include "llvm/Support/LineIterator.h"
36 #include "llvm/Support/MD5.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/VirtualFileSystem.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <algorithm>
41 #include <cstddef>
42 #include <cstdint>
43 #include <limits>
44 #include <memory>
45 #include <system_error>
46 #include <vector>
47 
48 using namespace llvm;
49 using namespace sampleprof;
50 
51 #define DEBUG_TYPE "samplepgo-reader"
52 
53 // This internal option specifies if the profile uses FS discriminators.
54 // It only applies to text, and binary format profiles.
55 // For ext-binary format profiles, the flag is set in the summary.
56 static cl::opt<bool> ProfileIsFSDisciminator(
57     "profile-isfs", cl::Hidden, cl::init(false),
58     cl::desc("Profile uses flow sensitive discriminators"));
59 
60 /// Dump the function profile for \p FName.
61 ///
62 /// \param FContext Name + context of the function to print.
63 /// \param OS Stream to emit the output to.
dumpFunctionProfile(const FunctionSamples & FS,raw_ostream & OS)64 void SampleProfileReader::dumpFunctionProfile(const FunctionSamples &FS,
65                                               raw_ostream &OS) {
66   OS << "Function: " << FS.getContext().toString() << ": " << FS;
67 }
68 
69 /// Dump all the function profiles found on stream \p OS.
dump(raw_ostream & OS)70 void SampleProfileReader::dump(raw_ostream &OS) {
71   std::vector<NameFunctionSamples> V;
72   sortFuncProfiles(Profiles, V);
73   for (const auto &I : V)
74     dumpFunctionProfile(*I.second, OS);
75 }
76 
dumpFunctionProfileJson(const FunctionSamples & S,json::OStream & JOS,bool TopLevel=false)77 static void dumpFunctionProfileJson(const FunctionSamples &S,
78                                     json::OStream &JOS, bool TopLevel = false) {
79   auto DumpBody = [&](const BodySampleMap &BodySamples) {
80     for (const auto &I : BodySamples) {
81       const LineLocation &Loc = I.first;
82       const SampleRecord &Sample = I.second;
83       JOS.object([&] {
84         JOS.attribute("line", Loc.LineOffset);
85         if (Loc.Discriminator)
86           JOS.attribute("discriminator", Loc.Discriminator);
87         JOS.attribute("samples", Sample.getSamples());
88 
89         auto CallTargets = Sample.getSortedCallTargets();
90         if (!CallTargets.empty()) {
91           JOS.attributeArray("calls", [&] {
92             for (const auto &J : CallTargets) {
93               JOS.object([&] {
94                 JOS.attribute("function", J.first.str());
95                 JOS.attribute("samples", J.second);
96               });
97             }
98           });
99         }
100       });
101     }
102   };
103 
104   auto DumpCallsiteSamples = [&](const CallsiteSampleMap &CallsiteSamples) {
105     for (const auto &I : CallsiteSamples)
106       for (const auto &FS : I.second) {
107         const LineLocation &Loc = I.first;
108         const FunctionSamples &CalleeSamples = FS.second;
109         JOS.object([&] {
110           JOS.attribute("line", Loc.LineOffset);
111           if (Loc.Discriminator)
112             JOS.attribute("discriminator", Loc.Discriminator);
113           JOS.attributeArray(
114               "samples", [&] { dumpFunctionProfileJson(CalleeSamples, JOS); });
115         });
116       }
117   };
118 
119   JOS.object([&] {
120     JOS.attribute("name", S.getFunction().str());
121     JOS.attribute("total", S.getTotalSamples());
122     if (TopLevel)
123       JOS.attribute("head", S.getHeadSamples());
124 
125     const auto &BodySamples = S.getBodySamples();
126     if (!BodySamples.empty())
127       JOS.attributeArray("body", [&] { DumpBody(BodySamples); });
128 
129     const auto &CallsiteSamples = S.getCallsiteSamples();
130     if (!CallsiteSamples.empty())
131       JOS.attributeArray("callsites",
132                          [&] { DumpCallsiteSamples(CallsiteSamples); });
133   });
134 }
135 
136 /// Dump all the function profiles found on stream \p OS in the JSON format.
dumpJson(raw_ostream & OS)137 void SampleProfileReader::dumpJson(raw_ostream &OS) {
138   std::vector<NameFunctionSamples> V;
139   sortFuncProfiles(Profiles, V);
140   json::OStream JOS(OS, 2);
141   JOS.arrayBegin();
142   for (const auto &F : V)
143     dumpFunctionProfileJson(*F.second, JOS, true);
144   JOS.arrayEnd();
145 
146   // Emit a newline character at the end as json::OStream doesn't emit one.
147   OS << "\n";
148 }
149 
150 /// Parse \p Input as function head.
151 ///
152 /// Parse one line of \p Input, and update function name in \p FName,
153 /// function's total sample count in \p NumSamples, function's entry
154 /// count in \p NumHeadSamples.
155 ///
156 /// \returns true if parsing is successful.
ParseHead(const StringRef & Input,StringRef & FName,uint64_t & NumSamples,uint64_t & NumHeadSamples)157 static bool ParseHead(const StringRef &Input, StringRef &FName,
158                       uint64_t &NumSamples, uint64_t &NumHeadSamples) {
159   if (Input[0] == ' ')
160     return false;
161   size_t n2 = Input.rfind(':');
162   size_t n1 = Input.rfind(':', n2 - 1);
163   FName = Input.substr(0, n1);
164   if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
165     return false;
166   if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
167     return false;
168   return true;
169 }
170 
171 /// Returns true if line offset \p L is legal (only has 16 bits).
isOffsetLegal(unsigned L)172 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
173 
174 /// Parse \p Input that contains metadata.
175 /// Possible metadata:
176 /// - CFG Checksum information:
177 ///     !CFGChecksum: 12345
178 /// - CFG Checksum information:
179 ///     !Attributes: 1
180 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
parseMetadata(const StringRef & Input,uint64_t & FunctionHash,uint32_t & Attributes)181 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
182                           uint32_t &Attributes) {
183   if (Input.starts_with("!CFGChecksum:")) {
184     StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
185     return !CFGInfo.getAsInteger(10, FunctionHash);
186   }
187 
188   if (Input.starts_with("!Attributes:")) {
189     StringRef Attrib = Input.substr(strlen("!Attributes:")).trim();
190     return !Attrib.getAsInteger(10, Attributes);
191   }
192 
193   return false;
194 }
195 
196 enum class LineType {
197   CallSiteProfile,
198   BodyProfile,
199   Metadata,
200 };
201 
202 /// Parse \p Input as line sample.
203 ///
204 /// \param Input input line.
205 /// \param LineTy Type of this line.
206 /// \param Depth the depth of the inline stack.
207 /// \param NumSamples total samples of the line/inlined callsite.
208 /// \param LineOffset line offset to the start of the function.
209 /// \param Discriminator discriminator of the line.
210 /// \param TargetCountMap map from indirect call target to count.
211 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
212 ///
213 /// returns true if parsing is successful.
ParseLine(const StringRef & Input,LineType & LineTy,uint32_t & Depth,uint64_t & NumSamples,uint32_t & LineOffset,uint32_t & Discriminator,StringRef & CalleeName,DenseMap<StringRef,uint64_t> & TargetCountMap,uint64_t & FunctionHash,uint32_t & Attributes)214 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
215                       uint64_t &NumSamples, uint32_t &LineOffset,
216                       uint32_t &Discriminator, StringRef &CalleeName,
217                       DenseMap<StringRef, uint64_t> &TargetCountMap,
218                       uint64_t &FunctionHash, uint32_t &Attributes) {
219   for (Depth = 0; Input[Depth] == ' '; Depth++)
220     ;
221   if (Depth == 0)
222     return false;
223 
224   if (Input[Depth] == '!') {
225     LineTy = LineType::Metadata;
226     return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
227   }
228 
229   size_t n1 = Input.find(':');
230   StringRef Loc = Input.substr(Depth, n1 - Depth);
231   size_t n2 = Loc.find('.');
232   if (n2 == StringRef::npos) {
233     if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
234       return false;
235     Discriminator = 0;
236   } else {
237     if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
238       return false;
239     if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
240       return false;
241   }
242 
243   StringRef Rest = Input.substr(n1 + 2);
244   if (isDigit(Rest[0])) {
245     LineTy = LineType::BodyProfile;
246     size_t n3 = Rest.find(' ');
247     if (n3 == StringRef::npos) {
248       if (Rest.getAsInteger(10, NumSamples))
249         return false;
250     } else {
251       if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
252         return false;
253     }
254     // Find call targets and their sample counts.
255     // Note: In some cases, there are symbols in the profile which are not
256     // mangled. To accommodate such cases, use colon + integer pairs as the
257     // anchor points.
258     // An example:
259     // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
260     // ":1000" and ":437" are used as anchor points so the string above will
261     // be interpreted as
262     // target: _M_construct<char *>
263     // count: 1000
264     // target: string_view<std::allocator<char> >
265     // count: 437
266     while (n3 != StringRef::npos) {
267       n3 += Rest.substr(n3).find_first_not_of(' ');
268       Rest = Rest.substr(n3);
269       n3 = Rest.find_first_of(':');
270       if (n3 == StringRef::npos || n3 == 0)
271         return false;
272 
273       StringRef Target;
274       uint64_t count, n4;
275       while (true) {
276         // Get the segment after the current colon.
277         StringRef AfterColon = Rest.substr(n3 + 1);
278         // Get the target symbol before the current colon.
279         Target = Rest.substr(0, n3);
280         // Check if the word after the current colon is an integer.
281         n4 = AfterColon.find_first_of(' ');
282         n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
283         StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
284         if (!WordAfterColon.getAsInteger(10, count))
285           break;
286 
287         // Try to find the next colon.
288         uint64_t n5 = AfterColon.find_first_of(':');
289         if (n5 == StringRef::npos)
290           return false;
291         n3 += n5 + 1;
292       }
293 
294       // An anchor point is found. Save the {target, count} pair
295       TargetCountMap[Target] = count;
296       if (n4 == Rest.size())
297         break;
298       // Change n3 to the next blank space after colon + integer pair.
299       n3 = n4;
300     }
301   } else {
302     LineTy = LineType::CallSiteProfile;
303     size_t n3 = Rest.find_last_of(':');
304     CalleeName = Rest.substr(0, n3);
305     if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
306       return false;
307   }
308   return true;
309 }
310 
311 /// Load samples from a text file.
312 ///
313 /// See the documentation at the top of the file for an explanation of
314 /// the expected format.
315 ///
316 /// \returns true if the file was loaded successfully, false otherwise.
readImpl()317 std::error_code SampleProfileReaderText::readImpl() {
318   line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
319   sampleprof_error Result = sampleprof_error::success;
320 
321   InlineCallStack InlineStack;
322   uint32_t TopLevelProbeProfileCount = 0;
323 
324   // DepthMetadata tracks whether we have processed metadata for the current
325   // top-level or nested function profile.
326   uint32_t DepthMetadata = 0;
327 
328   ProfileIsFS = ProfileIsFSDisciminator;
329   FunctionSamples::ProfileIsFS = ProfileIsFS;
330   for (; !LineIt.is_at_eof(); ++LineIt) {
331     size_t pos = LineIt->find_first_not_of(' ');
332     if (pos == LineIt->npos || (*LineIt)[pos] == '#')
333       continue;
334     // Read the header of each function.
335     //
336     // Note that for function identifiers we are actually expecting
337     // mangled names, but we may not always get them. This happens when
338     // the compiler decides not to emit the function (e.g., it was inlined
339     // and removed). In this case, the binary will not have the linkage
340     // name for the function, so the profiler will emit the function's
341     // unmangled name, which may contain characters like ':' and '>' in its
342     // name (member functions, templates, etc).
343     //
344     // The only requirement we place on the identifier, then, is that it
345     // should not begin with a number.
346     if ((*LineIt)[0] != ' ') {
347       uint64_t NumSamples, NumHeadSamples;
348       StringRef FName;
349       if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
350         reportError(LineIt.line_number(),
351                     "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
352         return sampleprof_error::malformed;
353       }
354       DepthMetadata = 0;
355       SampleContext FContext(FName, CSNameTable);
356       if (FContext.hasContext())
357         ++CSProfileCount;
358       FunctionSamples &FProfile = Profiles.Create(FContext);
359       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
360       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
361       InlineStack.clear();
362       InlineStack.push_back(&FProfile);
363     } else {
364       uint64_t NumSamples;
365       StringRef FName;
366       DenseMap<StringRef, uint64_t> TargetCountMap;
367       uint32_t Depth, LineOffset, Discriminator;
368       LineType LineTy;
369       uint64_t FunctionHash = 0;
370       uint32_t Attributes = 0;
371       if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
372                      Discriminator, FName, TargetCountMap, FunctionHash,
373                      Attributes)) {
374         reportError(LineIt.line_number(),
375                     "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
376                         *LineIt);
377         return sampleprof_error::malformed;
378       }
379       if (LineTy != LineType::Metadata && Depth == DepthMetadata) {
380         // Metadata must be put at the end of a function profile.
381         reportError(LineIt.line_number(),
382                     "Found non-metadata after metadata: " + *LineIt);
383         return sampleprof_error::malformed;
384       }
385 
386       // Here we handle FS discriminators.
387       Discriminator &= getDiscriminatorMask();
388 
389       while (InlineStack.size() > Depth) {
390         InlineStack.pop_back();
391       }
392       switch (LineTy) {
393       case LineType::CallSiteProfile: {
394         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
395             LineLocation(LineOffset, Discriminator))[FunctionId(FName)];
396         FSamples.setFunction(FunctionId(FName));
397         MergeResult(Result, FSamples.addTotalSamples(NumSamples));
398         InlineStack.push_back(&FSamples);
399         DepthMetadata = 0;
400         break;
401       }
402       case LineType::BodyProfile: {
403         while (InlineStack.size() > Depth) {
404           InlineStack.pop_back();
405         }
406         FunctionSamples &FProfile = *InlineStack.back();
407         for (const auto &name_count : TargetCountMap) {
408           MergeResult(Result, FProfile.addCalledTargetSamples(
409                                   LineOffset, Discriminator,
410                                   FunctionId(name_count.first),
411                                   name_count.second));
412         }
413         MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
414                                                     NumSamples));
415         break;
416       }
417       case LineType::Metadata: {
418         FunctionSamples &FProfile = *InlineStack.back();
419         if (FunctionHash) {
420           FProfile.setFunctionHash(FunctionHash);
421           if (Depth == 1)
422             ++TopLevelProbeProfileCount;
423         }
424         FProfile.getContext().setAllAttributes(Attributes);
425         if (Attributes & (uint32_t)ContextShouldBeInlined)
426           ProfileIsPreInlined = true;
427         DepthMetadata = Depth;
428         break;
429       }
430       }
431     }
432   }
433 
434   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
435          "Cannot have both context-sensitive and regular profile");
436   ProfileIsCS = (CSProfileCount > 0);
437   assert((TopLevelProbeProfileCount == 0 ||
438           TopLevelProbeProfileCount == Profiles.size()) &&
439          "Cannot have both probe-based profiles and regular profiles");
440   ProfileIsProbeBased = (TopLevelProbeProfileCount > 0);
441   FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
442   FunctionSamples::ProfileIsCS = ProfileIsCS;
443   FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined;
444 
445   if (Result == sampleprof_error::success)
446     computeSummary();
447 
448   return Result;
449 }
450 
hasFormat(const MemoryBuffer & Buffer)451 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
452   bool result = false;
453 
454   // Check that the first non-comment line is a valid function header.
455   line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
456   if (!LineIt.is_at_eof()) {
457     if ((*LineIt)[0] != ' ') {
458       uint64_t NumSamples, NumHeadSamples;
459       StringRef FName;
460       result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
461     }
462   }
463 
464   return result;
465 }
466 
readNumber()467 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
468   unsigned NumBytesRead = 0;
469   uint64_t Val = decodeULEB128(Data, &NumBytesRead);
470 
471   if (Val > std::numeric_limits<T>::max()) {
472     std::error_code EC = sampleprof_error::malformed;
473     reportError(0, EC.message());
474     return EC;
475   } else if (Data + NumBytesRead > End) {
476     std::error_code EC = sampleprof_error::truncated;
477     reportError(0, EC.message());
478     return EC;
479   }
480 
481   Data += NumBytesRead;
482   return static_cast<T>(Val);
483 }
484 
readString()485 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
486   StringRef Str(reinterpret_cast<const char *>(Data));
487   if (Data + Str.size() + 1 > End) {
488     std::error_code EC = sampleprof_error::truncated;
489     reportError(0, EC.message());
490     return EC;
491   }
492 
493   Data += Str.size() + 1;
494   return Str;
495 }
496 
497 template <typename T>
readUnencodedNumber()498 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
499   if (Data + sizeof(T) > End) {
500     std::error_code EC = sampleprof_error::truncated;
501     reportError(0, EC.message());
502     return EC;
503   }
504 
505   using namespace support;
506   T Val = endian::readNext<T, llvm::endianness::little, unaligned>(Data);
507   return Val;
508 }
509 
510 template <typename T>
readStringIndex(T & Table)511 inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
512   auto Idx = readNumber<size_t>();
513   if (std::error_code EC = Idx.getError())
514     return EC;
515   if (*Idx >= Table.size())
516     return sampleprof_error::truncated_name_table;
517   return *Idx;
518 }
519 
520 ErrorOr<FunctionId>
readStringFromTable(size_t * RetIdx)521 SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) {
522   auto Idx = readStringIndex(NameTable);
523   if (std::error_code EC = Idx.getError())
524     return EC;
525   if (RetIdx)
526     *RetIdx = *Idx;
527   return NameTable[*Idx];
528 }
529 
530 ErrorOr<SampleContextFrames>
readContextFromTable(size_t * RetIdx)531 SampleProfileReaderBinary::readContextFromTable(size_t *RetIdx) {
532   auto ContextIdx = readNumber<size_t>();
533   if (std::error_code EC = ContextIdx.getError())
534     return EC;
535   if (*ContextIdx >= CSNameTable.size())
536     return sampleprof_error::truncated_name_table;
537   if (RetIdx)
538     *RetIdx = *ContextIdx;
539   return CSNameTable[*ContextIdx];
540 }
541 
542 ErrorOr<std::pair<SampleContext, uint64_t>>
readSampleContextFromTable()543 SampleProfileReaderBinary::readSampleContextFromTable() {
544   SampleContext Context;
545   size_t Idx;
546   if (ProfileIsCS) {
547     auto FContext(readContextFromTable(&Idx));
548     if (std::error_code EC = FContext.getError())
549       return EC;
550     Context = SampleContext(*FContext);
551   } else {
552     auto FName(readStringFromTable(&Idx));
553     if (std::error_code EC = FName.getError())
554       return EC;
555     Context = SampleContext(*FName);
556   }
557   // Since MD5SampleContextStart may point to the profile's file data, need to
558   // make sure it is reading the same value on big endian CPU.
559   uint64_t Hash = support::endian::read64le(MD5SampleContextStart + Idx);
560   // Lazy computing of hash value, write back to the table to cache it. Only
561   // compute the context's hash value if it is being referenced for the first
562   // time.
563   if (Hash == 0) {
564     assert(MD5SampleContextStart == MD5SampleContextTable.data());
565     Hash = Context.getHashCode();
566     support::endian::write64le(&MD5SampleContextTable[Idx], Hash);
567   }
568   return std::make_pair(Context, Hash);
569 }
570 
571 std::error_code
readProfile(FunctionSamples & FProfile)572 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
573   auto NumSamples = readNumber<uint64_t>();
574   if (std::error_code EC = NumSamples.getError())
575     return EC;
576   FProfile.addTotalSamples(*NumSamples);
577 
578   // Read the samples in the body.
579   auto NumRecords = readNumber<uint32_t>();
580   if (std::error_code EC = NumRecords.getError())
581     return EC;
582 
583   for (uint32_t I = 0; I < *NumRecords; ++I) {
584     auto LineOffset = readNumber<uint64_t>();
585     if (std::error_code EC = LineOffset.getError())
586       return EC;
587 
588     if (!isOffsetLegal(*LineOffset)) {
589       return std::error_code();
590     }
591 
592     auto Discriminator = readNumber<uint64_t>();
593     if (std::error_code EC = Discriminator.getError())
594       return EC;
595 
596     auto NumSamples = readNumber<uint64_t>();
597     if (std::error_code EC = NumSamples.getError())
598       return EC;
599 
600     auto NumCalls = readNumber<uint32_t>();
601     if (std::error_code EC = NumCalls.getError())
602       return EC;
603 
604     // Here we handle FS discriminators:
605     uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
606 
607     for (uint32_t J = 0; J < *NumCalls; ++J) {
608       auto CalledFunction(readStringFromTable());
609       if (std::error_code EC = CalledFunction.getError())
610         return EC;
611 
612       auto CalledFunctionSamples = readNumber<uint64_t>();
613       if (std::error_code EC = CalledFunctionSamples.getError())
614         return EC;
615 
616       FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal,
617                                       *CalledFunction, *CalledFunctionSamples);
618     }
619 
620     FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples);
621   }
622 
623   // Read all the samples for inlined function calls.
624   auto NumCallsites = readNumber<uint32_t>();
625   if (std::error_code EC = NumCallsites.getError())
626     return EC;
627 
628   for (uint32_t J = 0; J < *NumCallsites; ++J) {
629     auto LineOffset = readNumber<uint64_t>();
630     if (std::error_code EC = LineOffset.getError())
631       return EC;
632 
633     auto Discriminator = readNumber<uint64_t>();
634     if (std::error_code EC = Discriminator.getError())
635       return EC;
636 
637     auto FName(readStringFromTable());
638     if (std::error_code EC = FName.getError())
639       return EC;
640 
641     // Here we handle FS discriminators:
642     uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
643 
644     FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
645         LineLocation(*LineOffset, DiscriminatorVal))[*FName];
646     CalleeProfile.setFunction(*FName);
647     if (std::error_code EC = readProfile(CalleeProfile))
648       return EC;
649   }
650 
651   return sampleprof_error::success;
652 }
653 
654 std::error_code
readFuncProfile(const uint8_t * Start)655 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
656   Data = Start;
657   auto NumHeadSamples = readNumber<uint64_t>();
658   if (std::error_code EC = NumHeadSamples.getError())
659     return EC;
660 
661   auto FContextHash(readSampleContextFromTable());
662   if (std::error_code EC = FContextHash.getError())
663     return EC;
664 
665   auto &[FContext, Hash] = *FContextHash;
666   // Use the cached hash value for insertion instead of recalculating it.
667   auto Res = Profiles.try_emplace(Hash, FContext, FunctionSamples());
668   FunctionSamples &FProfile = Res.first->second;
669   FProfile.setContext(FContext);
670   FProfile.addHeadSamples(*NumHeadSamples);
671 
672   if (FContext.hasContext())
673     CSProfileCount++;
674 
675   if (std::error_code EC = readProfile(FProfile))
676     return EC;
677   return sampleprof_error::success;
678 }
679 
readImpl()680 std::error_code SampleProfileReaderBinary::readImpl() {
681   ProfileIsFS = ProfileIsFSDisciminator;
682   FunctionSamples::ProfileIsFS = ProfileIsFS;
683   while (Data < End) {
684     if (std::error_code EC = readFuncProfile(Data))
685       return EC;
686   }
687 
688   return sampleprof_error::success;
689 }
690 
readOneSection(const uint8_t * Start,uint64_t Size,const SecHdrTableEntry & Entry)691 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
692     const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
693   Data = Start;
694   End = Start + Size;
695   switch (Entry.Type) {
696   case SecProfSummary:
697     if (std::error_code EC = readSummary())
698       return EC;
699     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
700       Summary->setPartialProfile(true);
701     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
702       FunctionSamples::ProfileIsCS = ProfileIsCS = true;
703     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
704       FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
705     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
706       FunctionSamples::ProfileIsFS = ProfileIsFS = true;
707     break;
708   case SecNameTable: {
709     bool FixedLengthMD5 =
710         hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
711     bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
712     // UseMD5 means if THIS section uses MD5, ProfileIsMD5 means if the entire
713     // profile uses MD5 for function name matching in IPO passes.
714     ProfileIsMD5 = ProfileIsMD5 || UseMD5;
715     FunctionSamples::HasUniqSuffix =
716         hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
717     if (std::error_code EC = readNameTableSec(UseMD5, FixedLengthMD5))
718       return EC;
719     break;
720   }
721   case SecCSNameTable: {
722     if (std::error_code EC = readCSNameTableSec())
723       return EC;
724     break;
725   }
726   case SecLBRProfile:
727     if (std::error_code EC = readFuncProfiles())
728       return EC;
729     break;
730   case SecFuncOffsetTable:
731     // If module is absent, we are using LLVM tools, and need to read all
732     // profiles, so skip reading the function offset table.
733     if (!M) {
734       Data = End;
735     } else {
736       assert((!ProfileIsCS ||
737               hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) &&
738              "func offset table should always be sorted in CS profile");
739       if (std::error_code EC = readFuncOffsetTable())
740         return EC;
741     }
742     break;
743   case SecFuncMetadata: {
744     ProfileIsProbeBased =
745         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
746     FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
747     bool HasAttribute =
748         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
749     if (std::error_code EC = readFuncMetadata(HasAttribute))
750       return EC;
751     break;
752   }
753   case SecProfileSymbolList:
754     if (std::error_code EC = readProfileSymbolList())
755       return EC;
756     break;
757   default:
758     if (std::error_code EC = readCustomSection(Entry))
759       return EC;
760     break;
761   }
762   return sampleprof_error::success;
763 }
764 
useFuncOffsetList() const765 bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
766   // If profile is CS, the function offset section is expected to consist of
767   // sequences of contexts in pre-order layout
768   // (e.g. [A, A:1 @ B, A:1 @ B:2.3 @ C] [D, D:1 @ E]), so that when a matched
769   // context in the module is found, the profiles of all its callees are
770   // recursively loaded. A list is needed since the order of profiles matters.
771   if (ProfileIsCS)
772     return true;
773 
774   // If the profile is MD5, use the map container to lookup functions in
775   // the module. A remapper has no use on MD5 names.
776   if (useMD5())
777     return false;
778 
779   // Profile is not MD5 and if a remapper is present, the remapped name of
780   // every function needed to be matched against the module, so use the list
781   // container since each entry is accessed.
782   if (Remapper)
783     return true;
784 
785   // Otherwise use the map container for faster lookup.
786   // TODO: If the cardinality of the function offset section is much smaller
787   // than the number of functions in the module, using the list container can
788   // be always faster, but we need to figure out the constant factor to
789   // determine the cutoff.
790   return false;
791 }
792 
793 
collectFuncsFromModule()794 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
795   if (!M)
796     return false;
797   FuncsToUse.clear();
798   for (auto &F : *M)
799     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
800   return true;
801 }
802 
readFuncOffsetTable()803 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
804   // If there are more than one function offset section, the profile associated
805   // with the previous section has to be done reading before next one is read.
806   FuncOffsetTable.clear();
807   FuncOffsetList.clear();
808 
809   auto Size = readNumber<uint64_t>();
810   if (std::error_code EC = Size.getError())
811     return EC;
812 
813   bool UseFuncOffsetList = useFuncOffsetList();
814   if (UseFuncOffsetList)
815     FuncOffsetList.reserve(*Size);
816   else
817     FuncOffsetTable.reserve(*Size);
818 
819   for (uint64_t I = 0; I < *Size; ++I) {
820     auto FContextHash(readSampleContextFromTable());
821     if (std::error_code EC = FContextHash.getError())
822       return EC;
823 
824     auto &[FContext, Hash] = *FContextHash;
825     auto Offset = readNumber<uint64_t>();
826     if (std::error_code EC = Offset.getError())
827       return EC;
828 
829     if (UseFuncOffsetList)
830       FuncOffsetList.emplace_back(FContext, *Offset);
831     else
832       // Because Porfiles replace existing value with new value if collision
833       // happens, we also use the latest offset so that they are consistent.
834       FuncOffsetTable[Hash] = *Offset;
835  }
836 
837  return sampleprof_error::success;
838 }
839 
readFuncProfiles()840 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
841   // Collect functions used by current module if the Reader has been
842   // given a module.
843   // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
844   // which will query FunctionSamples::HasUniqSuffix, so it has to be
845   // called after FunctionSamples::HasUniqSuffix is set, i.e. after
846   // NameTable section is read.
847   bool LoadFuncsToBeUsed = collectFuncsFromModule();
848 
849   // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all
850   // profiles.
851   const uint8_t *Start = Data;
852   if (!LoadFuncsToBeUsed) {
853     while (Data < End) {
854       if (std::error_code EC = readFuncProfile(Data))
855         return EC;
856     }
857     assert(Data == End && "More data is read than expected");
858   } else {
859     // Load function profiles on demand.
860     if (Remapper) {
861       for (auto Name : FuncsToUse) {
862         Remapper->insert(Name);
863       }
864     }
865 
866     if (ProfileIsCS) {
867       assert(useFuncOffsetList());
868       DenseSet<uint64_t> FuncGuidsToUse;
869       if (useMD5()) {
870         for (auto Name : FuncsToUse)
871           FuncGuidsToUse.insert(Function::getGUID(Name));
872       }
873 
874       // For each function in current module, load all context profiles for
875       // the function as well as their callee contexts which can help profile
876       // guided importing for ThinLTO. This can be achieved by walking
877       // through an ordered context container, where contexts are laid out
878       // as if they were walked in preorder of a context trie. While
879       // traversing the trie, a link to the highest common ancestor node is
880       // kept so that all of its decendants will be loaded.
881       const SampleContext *CommonContext = nullptr;
882       for (const auto &NameOffset : FuncOffsetList) {
883         const auto &FContext = NameOffset.first;
884         FunctionId FName = FContext.getFunction();
885         StringRef FNameString;
886         if (!useMD5())
887           FNameString = FName.stringRef();
888 
889         // For function in the current module, keep its farthest ancestor
890         // context. This can be used to load itself and its child and
891         // sibling contexts.
892         if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
893             (!useMD5() && (FuncsToUse.count(FNameString) ||
894                            (Remapper && Remapper->exist(FNameString))))) {
895           if (!CommonContext || !CommonContext->IsPrefixOf(FContext))
896             CommonContext = &FContext;
897         }
898 
899         if (CommonContext == &FContext ||
900             (CommonContext && CommonContext->IsPrefixOf(FContext))) {
901           // Load profile for the current context which originated from
902           // the common ancestor.
903           const uint8_t *FuncProfileAddr = Start + NameOffset.second;
904           if (std::error_code EC = readFuncProfile(FuncProfileAddr))
905             return EC;
906         }
907       }
908     } else if (useMD5()) {
909       assert(!useFuncOffsetList());
910       for (auto Name : FuncsToUse) {
911         auto GUID = MD5Hash(Name);
912         auto iter = FuncOffsetTable.find(GUID);
913         if (iter == FuncOffsetTable.end())
914           continue;
915         const uint8_t *FuncProfileAddr = Start + iter->second;
916         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
917           return EC;
918       }
919     } else if (Remapper) {
920       assert(useFuncOffsetList());
921       for (auto NameOffset : FuncOffsetList) {
922         SampleContext FContext(NameOffset.first);
923         auto FuncName = FContext.getFunction();
924         StringRef FuncNameStr = FuncName.stringRef();
925         if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr))
926           continue;
927         const uint8_t *FuncProfileAddr = Start + NameOffset.second;
928         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
929           return EC;
930       }
931     } else {
932       assert(!useFuncOffsetList());
933       for (auto Name : FuncsToUse) {
934         auto iter = FuncOffsetTable.find(MD5Hash(Name));
935         if (iter == FuncOffsetTable.end())
936           continue;
937         const uint8_t *FuncProfileAddr = Start + iter->second;
938         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
939           return EC;
940       }
941     }
942     Data = End;
943   }
944   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
945          "Cannot have both context-sensitive and regular profile");
946   assert((!CSProfileCount || ProfileIsCS) &&
947          "Section flag should be consistent with actual profile");
948   return sampleprof_error::success;
949 }
950 
readProfileSymbolList()951 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
952   if (!ProfSymList)
953     ProfSymList = std::make_unique<ProfileSymbolList>();
954 
955   if (std::error_code EC = ProfSymList->read(Data, End - Data))
956     return EC;
957 
958   Data = End;
959   return sampleprof_error::success;
960 }
961 
decompressSection(const uint8_t * SecStart,const uint64_t SecSize,const uint8_t * & DecompressBuf,uint64_t & DecompressBufSize)962 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
963     const uint8_t *SecStart, const uint64_t SecSize,
964     const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
965   Data = SecStart;
966   End = SecStart + SecSize;
967   auto DecompressSize = readNumber<uint64_t>();
968   if (std::error_code EC = DecompressSize.getError())
969     return EC;
970   DecompressBufSize = *DecompressSize;
971 
972   auto CompressSize = readNumber<uint64_t>();
973   if (std::error_code EC = CompressSize.getError())
974     return EC;
975 
976   if (!llvm::compression::zlib::isAvailable())
977     return sampleprof_error::zlib_unavailable;
978 
979   uint8_t *Buffer = Allocator.Allocate<uint8_t>(DecompressBufSize);
980   size_t UCSize = DecompressBufSize;
981   llvm::Error E = compression::zlib::decompress(ArrayRef(Data, *CompressSize),
982                                                 Buffer, UCSize);
983   if (E)
984     return sampleprof_error::uncompress_failed;
985   DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
986   return sampleprof_error::success;
987 }
988 
readImpl()989 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
990   const uint8_t *BufStart =
991       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
992 
993   for (auto &Entry : SecHdrTable) {
994     // Skip empty section.
995     if (!Entry.Size)
996       continue;
997 
998     // Skip sections without context when SkipFlatProf is true.
999     if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1000       continue;
1001 
1002     const uint8_t *SecStart = BufStart + Entry.Offset;
1003     uint64_t SecSize = Entry.Size;
1004 
1005     // If the section is compressed, decompress it into a buffer
1006     // DecompressBuf before reading the actual data. The pointee of
1007     // 'Data' will be changed to buffer hold by DecompressBuf
1008     // temporarily when reading the actual data.
1009     bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
1010     if (isCompressed) {
1011       const uint8_t *DecompressBuf;
1012       uint64_t DecompressBufSize;
1013       if (std::error_code EC = decompressSection(
1014               SecStart, SecSize, DecompressBuf, DecompressBufSize))
1015         return EC;
1016       SecStart = DecompressBuf;
1017       SecSize = DecompressBufSize;
1018     }
1019 
1020     if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
1021       return EC;
1022     if (Data != SecStart + SecSize)
1023       return sampleprof_error::malformed;
1024 
1025     // Change the pointee of 'Data' from DecompressBuf to original Buffer.
1026     if (isCompressed) {
1027       Data = BufStart + Entry.Offset;
1028       End = BufStart + Buffer->getBufferSize();
1029     }
1030   }
1031 
1032   return sampleprof_error::success;
1033 }
1034 
verifySPMagic(uint64_t Magic)1035 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
1036   if (Magic == SPMagic())
1037     return sampleprof_error::success;
1038   return sampleprof_error::bad_magic;
1039 }
1040 
verifySPMagic(uint64_t Magic)1041 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
1042   if (Magic == SPMagic(SPF_Ext_Binary))
1043     return sampleprof_error::success;
1044   return sampleprof_error::bad_magic;
1045 }
1046 
readNameTable()1047 std::error_code SampleProfileReaderBinary::readNameTable() {
1048   auto Size = readNumber<size_t>();
1049   if (std::error_code EC = Size.getError())
1050     return EC;
1051 
1052   // Normally if useMD5 is true, the name table should have MD5 values, not
1053   // strings, however in the case that ExtBinary profile has multiple name
1054   // tables mixing string and MD5, all of them have to be normalized to use MD5,
1055   // because optimization passes can only handle either type.
1056   bool UseMD5 = useMD5();
1057 
1058   NameTable.clear();
1059   NameTable.reserve(*Size);
1060   if (!ProfileIsCS) {
1061     MD5SampleContextTable.clear();
1062     if (UseMD5)
1063       MD5SampleContextTable.reserve(*Size);
1064     else
1065       // If we are using strings, delay MD5 computation since only a portion of
1066       // names are used by top level functions. Use 0 to indicate MD5 value is
1067       // to be calculated as no known string has a MD5 value of 0.
1068       MD5SampleContextTable.resize(*Size);
1069   }
1070   for (size_t I = 0; I < *Size; ++I) {
1071     auto Name(readString());
1072     if (std::error_code EC = Name.getError())
1073       return EC;
1074     if (UseMD5) {
1075       FunctionId FID(*Name);
1076       if (!ProfileIsCS)
1077         MD5SampleContextTable.emplace_back(FID.getHashCode());
1078       NameTable.emplace_back(FID);
1079     } else
1080       NameTable.push_back(FunctionId(*Name));
1081   }
1082   if (!ProfileIsCS)
1083     MD5SampleContextStart = MD5SampleContextTable.data();
1084   return sampleprof_error::success;
1085 }
1086 
1087 std::error_code
readNameTableSec(bool IsMD5,bool FixedLengthMD5)1088 SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5,
1089                                                    bool FixedLengthMD5) {
1090   if (FixedLengthMD5) {
1091     if (!IsMD5)
1092       errs() << "If FixedLengthMD5 is true, UseMD5 has to be true";
1093     auto Size = readNumber<size_t>();
1094     if (std::error_code EC = Size.getError())
1095       return EC;
1096 
1097     assert(Data + (*Size) * sizeof(uint64_t) == End &&
1098            "Fixed length MD5 name table does not contain specified number of "
1099            "entries");
1100     if (Data + (*Size) * sizeof(uint64_t) > End)
1101       return sampleprof_error::truncated;
1102 
1103     NameTable.clear();
1104     NameTable.reserve(*Size);
1105     for (size_t I = 0; I < *Size; ++I) {
1106       using namespace support;
1107       uint64_t FID = endian::read<uint64_t, endianness::little, unaligned>(
1108           Data + I * sizeof(uint64_t));
1109       NameTable.emplace_back(FunctionId(FID));
1110     }
1111     if (!ProfileIsCS)
1112       MD5SampleContextStart = reinterpret_cast<const uint64_t *>(Data);
1113     Data = Data + (*Size) * sizeof(uint64_t);
1114     return sampleprof_error::success;
1115   }
1116 
1117   if (IsMD5) {
1118     assert(!FixedLengthMD5 && "FixedLengthMD5 should be unreachable here");
1119     auto Size = readNumber<size_t>();
1120     if (std::error_code EC = Size.getError())
1121       return EC;
1122 
1123     NameTable.clear();
1124     NameTable.reserve(*Size);
1125     if (!ProfileIsCS)
1126       MD5SampleContextTable.resize(*Size);
1127     for (size_t I = 0; I < *Size; ++I) {
1128       auto FID = readNumber<uint64_t>();
1129       if (std::error_code EC = FID.getError())
1130         return EC;
1131       if (!ProfileIsCS)
1132         support::endian::write64le(&MD5SampleContextTable[I], *FID);
1133       NameTable.emplace_back(FunctionId(*FID));
1134     }
1135     if (!ProfileIsCS)
1136       MD5SampleContextStart = MD5SampleContextTable.data();
1137     return sampleprof_error::success;
1138   }
1139 
1140   return SampleProfileReaderBinary::readNameTable();
1141 }
1142 
1143 // Read in the CS name table section, which basically contains a list of context
1144 // vectors. Each element of a context vector, aka a frame, refers to the
1145 // underlying raw function names that are stored in the name table, as well as
1146 // a callsite identifier that only makes sense for non-leaf frames.
readCSNameTableSec()1147 std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
1148   auto Size = readNumber<size_t>();
1149   if (std::error_code EC = Size.getError())
1150     return EC;
1151 
1152   CSNameTable.clear();
1153   CSNameTable.reserve(*Size);
1154   if (ProfileIsCS) {
1155     // Delay MD5 computation of CS context until they are needed. Use 0 to
1156     // indicate MD5 value is to be calculated as no known string has a MD5
1157     // value of 0.
1158     MD5SampleContextTable.clear();
1159     MD5SampleContextTable.resize(*Size);
1160     MD5SampleContextStart = MD5SampleContextTable.data();
1161   }
1162   for (size_t I = 0; I < *Size; ++I) {
1163     CSNameTable.emplace_back(SampleContextFrameVector());
1164     auto ContextSize = readNumber<uint32_t>();
1165     if (std::error_code EC = ContextSize.getError())
1166       return EC;
1167     for (uint32_t J = 0; J < *ContextSize; ++J) {
1168       auto FName(readStringFromTable());
1169       if (std::error_code EC = FName.getError())
1170         return EC;
1171       auto LineOffset = readNumber<uint64_t>();
1172       if (std::error_code EC = LineOffset.getError())
1173         return EC;
1174 
1175       if (!isOffsetLegal(*LineOffset))
1176         return std::error_code();
1177 
1178       auto Discriminator = readNumber<uint64_t>();
1179       if (std::error_code EC = Discriminator.getError())
1180         return EC;
1181 
1182       CSNameTable.back().emplace_back(
1183           FName.get(), LineLocation(LineOffset.get(), Discriminator.get()));
1184     }
1185   }
1186 
1187   return sampleprof_error::success;
1188 }
1189 
1190 std::error_code
readFuncMetadata(bool ProfileHasAttribute,FunctionSamples * FProfile)1191 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
1192                                                    FunctionSamples *FProfile) {
1193   if (Data < End) {
1194     if (ProfileIsProbeBased) {
1195       auto Checksum = readNumber<uint64_t>();
1196       if (std::error_code EC = Checksum.getError())
1197         return EC;
1198       if (FProfile)
1199         FProfile->setFunctionHash(*Checksum);
1200     }
1201 
1202     if (ProfileHasAttribute) {
1203       auto Attributes = readNumber<uint32_t>();
1204       if (std::error_code EC = Attributes.getError())
1205         return EC;
1206       if (FProfile)
1207         FProfile->getContext().setAllAttributes(*Attributes);
1208     }
1209 
1210     if (!ProfileIsCS) {
1211       // Read all the attributes for inlined function calls.
1212       auto NumCallsites = readNumber<uint32_t>();
1213       if (std::error_code EC = NumCallsites.getError())
1214         return EC;
1215 
1216       for (uint32_t J = 0; J < *NumCallsites; ++J) {
1217         auto LineOffset = readNumber<uint64_t>();
1218         if (std::error_code EC = LineOffset.getError())
1219           return EC;
1220 
1221         auto Discriminator = readNumber<uint64_t>();
1222         if (std::error_code EC = Discriminator.getError())
1223           return EC;
1224 
1225         auto FContextHash(readSampleContextFromTable());
1226         if (std::error_code EC = FContextHash.getError())
1227           return EC;
1228 
1229         auto &[FContext, Hash] = *FContextHash;
1230         FunctionSamples *CalleeProfile = nullptr;
1231         if (FProfile) {
1232           CalleeProfile = const_cast<FunctionSamples *>(
1233               &FProfile->functionSamplesAt(LineLocation(
1234                   *LineOffset,
1235                   *Discriminator))[FContext.getFunction()]);
1236         }
1237         if (std::error_code EC =
1238                 readFuncMetadata(ProfileHasAttribute, CalleeProfile))
1239           return EC;
1240       }
1241     }
1242   }
1243 
1244   return sampleprof_error::success;
1245 }
1246 
1247 std::error_code
readFuncMetadata(bool ProfileHasAttribute)1248 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
1249   while (Data < End) {
1250     auto FContextHash(readSampleContextFromTable());
1251     if (std::error_code EC = FContextHash.getError())
1252       return EC;
1253     auto &[FContext, Hash] = *FContextHash;
1254     FunctionSamples *FProfile = nullptr;
1255     auto It = Profiles.find(FContext);
1256     if (It != Profiles.end())
1257       FProfile = &It->second;
1258 
1259     if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
1260       return EC;
1261   }
1262 
1263   assert(Data == End && "More data is read than expected");
1264   return sampleprof_error::success;
1265 }
1266 
1267 std::error_code
readSecHdrTableEntry(uint64_t Idx)1268 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint64_t Idx) {
1269   SecHdrTableEntry Entry;
1270   auto Type = readUnencodedNumber<uint64_t>();
1271   if (std::error_code EC = Type.getError())
1272     return EC;
1273   Entry.Type = static_cast<SecType>(*Type);
1274 
1275   auto Flags = readUnencodedNumber<uint64_t>();
1276   if (std::error_code EC = Flags.getError())
1277     return EC;
1278   Entry.Flags = *Flags;
1279 
1280   auto Offset = readUnencodedNumber<uint64_t>();
1281   if (std::error_code EC = Offset.getError())
1282     return EC;
1283   Entry.Offset = *Offset;
1284 
1285   auto Size = readUnencodedNumber<uint64_t>();
1286   if (std::error_code EC = Size.getError())
1287     return EC;
1288   Entry.Size = *Size;
1289 
1290   Entry.LayoutIndex = Idx;
1291   SecHdrTable.push_back(std::move(Entry));
1292   return sampleprof_error::success;
1293 }
1294 
readSecHdrTable()1295 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
1296   auto EntryNum = readUnencodedNumber<uint64_t>();
1297   if (std::error_code EC = EntryNum.getError())
1298     return EC;
1299 
1300   for (uint64_t i = 0; i < (*EntryNum); i++)
1301     if (std::error_code EC = readSecHdrTableEntry(i))
1302       return EC;
1303 
1304   return sampleprof_error::success;
1305 }
1306 
readHeader()1307 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
1308   const uint8_t *BufStart =
1309       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1310   Data = BufStart;
1311   End = BufStart + Buffer->getBufferSize();
1312 
1313   if (std::error_code EC = readMagicIdent())
1314     return EC;
1315 
1316   if (std::error_code EC = readSecHdrTable())
1317     return EC;
1318 
1319   return sampleprof_error::success;
1320 }
1321 
getSectionSize(SecType Type)1322 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
1323   uint64_t Size = 0;
1324   for (auto &Entry : SecHdrTable) {
1325     if (Entry.Type == Type)
1326       Size += Entry.Size;
1327   }
1328   return Size;
1329 }
1330 
getFileSize()1331 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
1332   // Sections in SecHdrTable is not necessarily in the same order as
1333   // sections in the profile because section like FuncOffsetTable needs
1334   // to be written after section LBRProfile but needs to be read before
1335   // section LBRProfile, so we cannot simply use the last entry in
1336   // SecHdrTable to calculate the file size.
1337   uint64_t FileSize = 0;
1338   for (auto &Entry : SecHdrTable) {
1339     FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
1340   }
1341   return FileSize;
1342 }
1343 
getSecFlagsStr(const SecHdrTableEntry & Entry)1344 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
1345   std::string Flags;
1346   if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
1347     Flags.append("{compressed,");
1348   else
1349     Flags.append("{");
1350 
1351   if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1352     Flags.append("flat,");
1353 
1354   switch (Entry.Type) {
1355   case SecNameTable:
1356     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
1357       Flags.append("fixlenmd5,");
1358     else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
1359       Flags.append("md5,");
1360     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix))
1361       Flags.append("uniq,");
1362     break;
1363   case SecProfSummary:
1364     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
1365       Flags.append("partial,");
1366     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
1367       Flags.append("context,");
1368     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
1369       Flags.append("preInlined,");
1370     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
1371       Flags.append("fs-discriminator,");
1372     break;
1373   case SecFuncOffsetTable:
1374     if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
1375       Flags.append("ordered,");
1376     break;
1377   case SecFuncMetadata:
1378     if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased))
1379       Flags.append("probe,");
1380     if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute))
1381       Flags.append("attr,");
1382     break;
1383   default:
1384     break;
1385   }
1386   char &last = Flags.back();
1387   if (last == ',')
1388     last = '}';
1389   else
1390     Flags.append("}");
1391   return Flags;
1392 }
1393 
dumpSectionInfo(raw_ostream & OS)1394 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1395   uint64_t TotalSecsSize = 0;
1396   for (auto &Entry : SecHdrTable) {
1397     OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1398        << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1399        << "\n";
1400     ;
1401     TotalSecsSize += Entry.Size;
1402   }
1403   uint64_t HeaderSize = SecHdrTable.front().Offset;
1404   assert(HeaderSize + TotalSecsSize == getFileSize() &&
1405          "Size of 'header + sections' doesn't match the total size of profile");
1406 
1407   OS << "Header Size: " << HeaderSize << "\n";
1408   OS << "Total Sections Size: " << TotalSecsSize << "\n";
1409   OS << "File Size: " << getFileSize() << "\n";
1410   return true;
1411 }
1412 
readMagicIdent()1413 std::error_code SampleProfileReaderBinary::readMagicIdent() {
1414   // Read and check the magic identifier.
1415   auto Magic = readNumber<uint64_t>();
1416   if (std::error_code EC = Magic.getError())
1417     return EC;
1418   else if (std::error_code EC = verifySPMagic(*Magic))
1419     return EC;
1420 
1421   // Read the version number.
1422   auto Version = readNumber<uint64_t>();
1423   if (std::error_code EC = Version.getError())
1424     return EC;
1425   else if (*Version != SPVersion())
1426     return sampleprof_error::unsupported_version;
1427 
1428   return sampleprof_error::success;
1429 }
1430 
readHeader()1431 std::error_code SampleProfileReaderBinary::readHeader() {
1432   Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1433   End = Data + Buffer->getBufferSize();
1434 
1435   if (std::error_code EC = readMagicIdent())
1436     return EC;
1437 
1438   if (std::error_code EC = readSummary())
1439     return EC;
1440 
1441   if (std::error_code EC = readNameTable())
1442     return EC;
1443   return sampleprof_error::success;
1444 }
1445 
readSummaryEntry(std::vector<ProfileSummaryEntry> & Entries)1446 std::error_code SampleProfileReaderBinary::readSummaryEntry(
1447     std::vector<ProfileSummaryEntry> &Entries) {
1448   auto Cutoff = readNumber<uint64_t>();
1449   if (std::error_code EC = Cutoff.getError())
1450     return EC;
1451 
1452   auto MinBlockCount = readNumber<uint64_t>();
1453   if (std::error_code EC = MinBlockCount.getError())
1454     return EC;
1455 
1456   auto NumBlocks = readNumber<uint64_t>();
1457   if (std::error_code EC = NumBlocks.getError())
1458     return EC;
1459 
1460   Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1461   return sampleprof_error::success;
1462 }
1463 
readSummary()1464 std::error_code SampleProfileReaderBinary::readSummary() {
1465   auto TotalCount = readNumber<uint64_t>();
1466   if (std::error_code EC = TotalCount.getError())
1467     return EC;
1468 
1469   auto MaxBlockCount = readNumber<uint64_t>();
1470   if (std::error_code EC = MaxBlockCount.getError())
1471     return EC;
1472 
1473   auto MaxFunctionCount = readNumber<uint64_t>();
1474   if (std::error_code EC = MaxFunctionCount.getError())
1475     return EC;
1476 
1477   auto NumBlocks = readNumber<uint64_t>();
1478   if (std::error_code EC = NumBlocks.getError())
1479     return EC;
1480 
1481   auto NumFunctions = readNumber<uint64_t>();
1482   if (std::error_code EC = NumFunctions.getError())
1483     return EC;
1484 
1485   auto NumSummaryEntries = readNumber<uint64_t>();
1486   if (std::error_code EC = NumSummaryEntries.getError())
1487     return EC;
1488 
1489   std::vector<ProfileSummaryEntry> Entries;
1490   for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1491     std::error_code EC = readSummaryEntry(Entries);
1492     if (EC != sampleprof_error::success)
1493       return EC;
1494   }
1495   Summary = std::make_unique<ProfileSummary>(
1496       ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1497       *MaxFunctionCount, *NumBlocks, *NumFunctions);
1498 
1499   return sampleprof_error::success;
1500 }
1501 
hasFormat(const MemoryBuffer & Buffer)1502 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1503   const uint8_t *Data =
1504       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1505   uint64_t Magic = decodeULEB128(Data);
1506   return Magic == SPMagic();
1507 }
1508 
hasFormat(const MemoryBuffer & Buffer)1509 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1510   const uint8_t *Data =
1511       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1512   uint64_t Magic = decodeULEB128(Data);
1513   return Magic == SPMagic(SPF_Ext_Binary);
1514 }
1515 
skipNextWord()1516 std::error_code SampleProfileReaderGCC::skipNextWord() {
1517   uint32_t dummy;
1518   if (!GcovBuffer.readInt(dummy))
1519     return sampleprof_error::truncated;
1520   return sampleprof_error::success;
1521 }
1522 
readNumber()1523 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1524   if (sizeof(T) <= sizeof(uint32_t)) {
1525     uint32_t Val;
1526     if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1527       return static_cast<T>(Val);
1528   } else if (sizeof(T) <= sizeof(uint64_t)) {
1529     uint64_t Val;
1530     if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1531       return static_cast<T>(Val);
1532   }
1533 
1534   std::error_code EC = sampleprof_error::malformed;
1535   reportError(0, EC.message());
1536   return EC;
1537 }
1538 
readString()1539 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1540   StringRef Str;
1541   if (!GcovBuffer.readString(Str))
1542     return sampleprof_error::truncated;
1543   return Str;
1544 }
1545 
readHeader()1546 std::error_code SampleProfileReaderGCC::readHeader() {
1547   // Read the magic identifier.
1548   if (!GcovBuffer.readGCDAFormat())
1549     return sampleprof_error::unrecognized_format;
1550 
1551   // Read the version number. Note - the GCC reader does not validate this
1552   // version, but the profile creator generates v704.
1553   GCOV::GCOVVersion version;
1554   if (!GcovBuffer.readGCOVVersion(version))
1555     return sampleprof_error::unrecognized_format;
1556 
1557   if (version != GCOV::V407)
1558     return sampleprof_error::unsupported_version;
1559 
1560   // Skip the empty integer.
1561   if (std::error_code EC = skipNextWord())
1562     return EC;
1563 
1564   return sampleprof_error::success;
1565 }
1566 
readSectionTag(uint32_t Expected)1567 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1568   uint32_t Tag;
1569   if (!GcovBuffer.readInt(Tag))
1570     return sampleprof_error::truncated;
1571 
1572   if (Tag != Expected)
1573     return sampleprof_error::malformed;
1574 
1575   if (std::error_code EC = skipNextWord())
1576     return EC;
1577 
1578   return sampleprof_error::success;
1579 }
1580 
readNameTable()1581 std::error_code SampleProfileReaderGCC::readNameTable() {
1582   if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1583     return EC;
1584 
1585   uint32_t Size;
1586   if (!GcovBuffer.readInt(Size))
1587     return sampleprof_error::truncated;
1588 
1589   for (uint32_t I = 0; I < Size; ++I) {
1590     StringRef Str;
1591     if (!GcovBuffer.readString(Str))
1592       return sampleprof_error::truncated;
1593     Names.push_back(std::string(Str));
1594   }
1595 
1596   return sampleprof_error::success;
1597 }
1598 
readFunctionProfiles()1599 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1600   if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1601     return EC;
1602 
1603   uint32_t NumFunctions;
1604   if (!GcovBuffer.readInt(NumFunctions))
1605     return sampleprof_error::truncated;
1606 
1607   InlineCallStack Stack;
1608   for (uint32_t I = 0; I < NumFunctions; ++I)
1609     if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1610       return EC;
1611 
1612   computeSummary();
1613   return sampleprof_error::success;
1614 }
1615 
readOneFunctionProfile(const InlineCallStack & InlineStack,bool Update,uint32_t Offset)1616 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1617     const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1618   uint64_t HeadCount = 0;
1619   if (InlineStack.size() == 0)
1620     if (!GcovBuffer.readInt64(HeadCount))
1621       return sampleprof_error::truncated;
1622 
1623   uint32_t NameIdx;
1624   if (!GcovBuffer.readInt(NameIdx))
1625     return sampleprof_error::truncated;
1626 
1627   StringRef Name(Names[NameIdx]);
1628 
1629   uint32_t NumPosCounts;
1630   if (!GcovBuffer.readInt(NumPosCounts))
1631     return sampleprof_error::truncated;
1632 
1633   uint32_t NumCallsites;
1634   if (!GcovBuffer.readInt(NumCallsites))
1635     return sampleprof_error::truncated;
1636 
1637   FunctionSamples *FProfile = nullptr;
1638   if (InlineStack.size() == 0) {
1639     // If this is a top function that we have already processed, do not
1640     // update its profile again.  This happens in the presence of
1641     // function aliases.  Since these aliases share the same function
1642     // body, there will be identical replicated profiles for the
1643     // original function.  In this case, we simply not bother updating
1644     // the profile of the original function.
1645     FProfile = &Profiles[FunctionId(Name)];
1646     FProfile->addHeadSamples(HeadCount);
1647     if (FProfile->getTotalSamples() > 0)
1648       Update = false;
1649   } else {
1650     // Otherwise, we are reading an inlined instance. The top of the
1651     // inline stack contains the profile of the caller. Insert this
1652     // callee in the caller's CallsiteMap.
1653     FunctionSamples *CallerProfile = InlineStack.front();
1654     uint32_t LineOffset = Offset >> 16;
1655     uint32_t Discriminator = Offset & 0xffff;
1656     FProfile = &CallerProfile->functionSamplesAt(
1657         LineLocation(LineOffset, Discriminator))[FunctionId(Name)];
1658   }
1659   FProfile->setFunction(FunctionId(Name));
1660 
1661   for (uint32_t I = 0; I < NumPosCounts; ++I) {
1662     uint32_t Offset;
1663     if (!GcovBuffer.readInt(Offset))
1664       return sampleprof_error::truncated;
1665 
1666     uint32_t NumTargets;
1667     if (!GcovBuffer.readInt(NumTargets))
1668       return sampleprof_error::truncated;
1669 
1670     uint64_t Count;
1671     if (!GcovBuffer.readInt64(Count))
1672       return sampleprof_error::truncated;
1673 
1674     // The line location is encoded in the offset as:
1675     //   high 16 bits: line offset to the start of the function.
1676     //   low 16 bits: discriminator.
1677     uint32_t LineOffset = Offset >> 16;
1678     uint32_t Discriminator = Offset & 0xffff;
1679 
1680     InlineCallStack NewStack;
1681     NewStack.push_back(FProfile);
1682     llvm::append_range(NewStack, InlineStack);
1683     if (Update) {
1684       // Walk up the inline stack, adding the samples on this line to
1685       // the total sample count of the callers in the chain.
1686       for (auto *CallerProfile : NewStack)
1687         CallerProfile->addTotalSamples(Count);
1688 
1689       // Update the body samples for the current profile.
1690       FProfile->addBodySamples(LineOffset, Discriminator, Count);
1691     }
1692 
1693     // Process the list of functions called at an indirect call site.
1694     // These are all the targets that a function pointer (or virtual
1695     // function) resolved at runtime.
1696     for (uint32_t J = 0; J < NumTargets; J++) {
1697       uint32_t HistVal;
1698       if (!GcovBuffer.readInt(HistVal))
1699         return sampleprof_error::truncated;
1700 
1701       if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1702         return sampleprof_error::malformed;
1703 
1704       uint64_t TargetIdx;
1705       if (!GcovBuffer.readInt64(TargetIdx))
1706         return sampleprof_error::truncated;
1707       StringRef TargetName(Names[TargetIdx]);
1708 
1709       uint64_t TargetCount;
1710       if (!GcovBuffer.readInt64(TargetCount))
1711         return sampleprof_error::truncated;
1712 
1713       if (Update)
1714         FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1715                                          FunctionId(TargetName),
1716                                          TargetCount);
1717     }
1718   }
1719 
1720   // Process all the inlined callers into the current function. These
1721   // are all the callsites that were inlined into this function.
1722   for (uint32_t I = 0; I < NumCallsites; I++) {
1723     // The offset is encoded as:
1724     //   high 16 bits: line offset to the start of the function.
1725     //   low 16 bits: discriminator.
1726     uint32_t Offset;
1727     if (!GcovBuffer.readInt(Offset))
1728       return sampleprof_error::truncated;
1729     InlineCallStack NewStack;
1730     NewStack.push_back(FProfile);
1731     llvm::append_range(NewStack, InlineStack);
1732     if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1733       return EC;
1734   }
1735 
1736   return sampleprof_error::success;
1737 }
1738 
1739 /// Read a GCC AutoFDO profile.
1740 ///
1741 /// This format is generated by the Linux Perf conversion tool at
1742 /// https://github.com/google/autofdo.
readImpl()1743 std::error_code SampleProfileReaderGCC::readImpl() {
1744   assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator");
1745   // Read the string table.
1746   if (std::error_code EC = readNameTable())
1747     return EC;
1748 
1749   // Read the source profile.
1750   if (std::error_code EC = readFunctionProfiles())
1751     return EC;
1752 
1753   return sampleprof_error::success;
1754 }
1755 
hasFormat(const MemoryBuffer & Buffer)1756 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1757   StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1758   return Magic == "adcg*704";
1759 }
1760 
applyRemapping(LLVMContext & Ctx)1761 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1762   // If the reader uses MD5 to represent string, we can't remap it because
1763   // we don't know what the original function names were.
1764   if (Reader.useMD5()) {
1765     Ctx.diagnose(DiagnosticInfoSampleProfile(
1766         Reader.getBuffer()->getBufferIdentifier(),
1767         "Profile data remapping cannot be applied to profile data "
1768         "using MD5 names (original mangled names are not available).",
1769         DS_Warning));
1770     return;
1771   }
1772 
1773   // CSSPGO-TODO: Remapper is not yet supported.
1774   // We will need to remap the entire context string.
1775   assert(Remappings && "should be initialized while creating remapper");
1776   for (auto &Sample : Reader.getProfiles()) {
1777     DenseSet<FunctionId> NamesInSample;
1778     Sample.second.findAllNames(NamesInSample);
1779     for (auto &Name : NamesInSample) {
1780       StringRef NameStr = Name.stringRef();
1781       if (auto Key = Remappings->insert(NameStr))
1782         NameMap.insert({Key, NameStr});
1783     }
1784   }
1785 
1786   RemappingApplied = true;
1787 }
1788 
1789 std::optional<StringRef>
lookUpNameInProfile(StringRef Fname)1790 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1791   if (auto Key = Remappings->lookup(Fname)) {
1792     StringRef Result = NameMap.lookup(Key);
1793     if (!Result.empty())
1794       return Result;
1795   }
1796   return std::nullopt;
1797 }
1798 
1799 /// Prepare a memory buffer for the contents of \p Filename.
1800 ///
1801 /// \returns an error code indicating the status of the buffer.
1802 static ErrorOr<std::unique_ptr<MemoryBuffer>>
setupMemoryBuffer(const Twine & Filename,vfs::FileSystem & FS)1803 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
1804   auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
1805                                            : FS.getBufferForFile(Filename);
1806   if (std::error_code EC = BufferOrErr.getError())
1807     return EC;
1808   auto Buffer = std::move(BufferOrErr.get());
1809 
1810   return std::move(Buffer);
1811 }
1812 
1813 /// Create a sample profile reader based on the format of the input file.
1814 ///
1815 /// \param Filename The file to open.
1816 ///
1817 /// \param C The LLVM context to use to emit diagnostics.
1818 ///
1819 /// \param P The FSDiscriminatorPass.
1820 ///
1821 /// \param RemapFilename The file used for profile remapping.
1822 ///
1823 /// \returns an error code indicating the status of the created reader.
1824 ErrorOr<std::unique_ptr<SampleProfileReader>>
create(const std::string Filename,LLVMContext & C,vfs::FileSystem & FS,FSDiscriminatorPass P,const std::string RemapFilename)1825 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1826                             vfs::FileSystem &FS, FSDiscriminatorPass P,
1827                             const std::string RemapFilename) {
1828   auto BufferOrError = setupMemoryBuffer(Filename, FS);
1829   if (std::error_code EC = BufferOrError.getError())
1830     return EC;
1831   return create(BufferOrError.get(), C, FS, P, RemapFilename);
1832 }
1833 
1834 /// Create a sample profile remapper from the given input, to remap the
1835 /// function names in the given profile data.
1836 ///
1837 /// \param Filename The file to open.
1838 ///
1839 /// \param Reader The profile reader the remapper is going to be applied to.
1840 ///
1841 /// \param C The LLVM context to use to emit diagnostics.
1842 ///
1843 /// \returns an error code indicating the status of the created reader.
1844 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
create(const std::string Filename,vfs::FileSystem & FS,SampleProfileReader & Reader,LLVMContext & C)1845 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1846                                            vfs::FileSystem &FS,
1847                                            SampleProfileReader &Reader,
1848                                            LLVMContext &C) {
1849   auto BufferOrError = setupMemoryBuffer(Filename, FS);
1850   if (std::error_code EC = BufferOrError.getError())
1851     return EC;
1852   return create(BufferOrError.get(), Reader, C);
1853 }
1854 
1855 /// Create a sample profile remapper from the given input, to remap the
1856 /// function names in the given profile data.
1857 ///
1858 /// \param B The memory buffer to create the reader from (assumes ownership).
1859 ///
1860 /// \param C The LLVM context to use to emit diagnostics.
1861 ///
1862 /// \param Reader The profile reader the remapper is going to be applied to.
1863 ///
1864 /// \returns an error code indicating the status of the created reader.
1865 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
create(std::unique_ptr<MemoryBuffer> & B,SampleProfileReader & Reader,LLVMContext & C)1866 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1867                                            SampleProfileReader &Reader,
1868                                            LLVMContext &C) {
1869   auto Remappings = std::make_unique<SymbolRemappingReader>();
1870   if (Error E = Remappings->read(*B)) {
1871     handleAllErrors(
1872         std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1873           C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1874                                                  ParseError.getLineNum(),
1875                                                  ParseError.getMessage()));
1876         });
1877     return sampleprof_error::malformed;
1878   }
1879 
1880   return std::make_unique<SampleProfileReaderItaniumRemapper>(
1881       std::move(B), std::move(Remappings), Reader);
1882 }
1883 
1884 /// Create a sample profile reader based on the format of the input data.
1885 ///
1886 /// \param B The memory buffer to create the reader from (assumes ownership).
1887 ///
1888 /// \param C The LLVM context to use to emit diagnostics.
1889 ///
1890 /// \param P The FSDiscriminatorPass.
1891 ///
1892 /// \param RemapFilename The file used for profile remapping.
1893 ///
1894 /// \returns an error code indicating the status of the created reader.
1895 ErrorOr<std::unique_ptr<SampleProfileReader>>
create(std::unique_ptr<MemoryBuffer> & B,LLVMContext & C,vfs::FileSystem & FS,FSDiscriminatorPass P,const std::string RemapFilename)1896 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1897                             vfs::FileSystem &FS, FSDiscriminatorPass P,
1898                             const std::string RemapFilename) {
1899   std::unique_ptr<SampleProfileReader> Reader;
1900   if (SampleProfileReaderRawBinary::hasFormat(*B))
1901     Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1902   else if (SampleProfileReaderExtBinary::hasFormat(*B))
1903     Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1904   else if (SampleProfileReaderGCC::hasFormat(*B))
1905     Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1906   else if (SampleProfileReaderText::hasFormat(*B))
1907     Reader.reset(new SampleProfileReaderText(std::move(B), C));
1908   else
1909     return sampleprof_error::unrecognized_format;
1910 
1911   if (!RemapFilename.empty()) {
1912     auto ReaderOrErr = SampleProfileReaderItaniumRemapper::create(
1913         RemapFilename, FS, *Reader, C);
1914     if (std::error_code EC = ReaderOrErr.getError()) {
1915       std::string Msg = "Could not create remapper: " + EC.message();
1916       C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1917       return EC;
1918     }
1919     Reader->Remapper = std::move(ReaderOrErr.get());
1920   }
1921 
1922   if (std::error_code EC = Reader->readHeader()) {
1923     return EC;
1924   }
1925 
1926   Reader->setDiscriminatorMaskedBitFrom(P);
1927 
1928   return std::move(Reader);
1929 }
1930 
1931 // For text and GCC file formats, we compute the summary after reading the
1932 // profile. Binary format has the profile summary in its header.
computeSummary()1933 void SampleProfileReader::computeSummary() {
1934   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1935   Summary = Builder.computeSummaryForProfiles(Profiles);
1936 }
1937