1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
11 //
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
14 //
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
17 //
18 // All three encodings can be used interchangeably as an input sample profile.
19 //
20 //===----------------------------------------------------------------------===//
21
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/ProfileSummary.h"
27 #include "llvm/ProfileData/ProfileCommon.h"
28 #include "llvm/ProfileData/SampleProf.h"
29 #include "llvm/Support/Compression.h"
30 #include "llvm/Support/ErrorOr.h"
31 #include "llvm/Support/LEB128.h"
32 #include "llvm/Support/LineIterator.h"
33 #include "llvm/Support/MD5.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <cstddef>
38 #include <cstdint>
39 #include <limits>
40 #include <memory>
41 #include <system_error>
42 #include <vector>
43
44 using namespace llvm;
45 using namespace sampleprof;
46
47 /// Dump the function profile for \p FName.
48 ///
49 /// \param FName Name of the function to print.
50 /// \param OS Stream to emit the output to.
dumpFunctionProfile(StringRef FName,raw_ostream & OS)51 void SampleProfileReader::dumpFunctionProfile(StringRef FName,
52 raw_ostream &OS) {
53 OS << "Function: " << FName << ": " << Profiles[FName];
54 }
55
56 /// Dump all the function profiles found on stream \p OS.
dump(raw_ostream & OS)57 void SampleProfileReader::dump(raw_ostream &OS) {
58 for (const auto &I : Profiles)
59 dumpFunctionProfile(I.getKey(), OS);
60 }
61
62 /// Parse \p Input as function head.
63 ///
64 /// Parse one line of \p Input, and update function name in \p FName,
65 /// function's total sample count in \p NumSamples, function's entry
66 /// count in \p NumHeadSamples.
67 ///
68 /// \returns true if parsing is successful.
ParseHead(const StringRef & Input,StringRef & FName,uint64_t & NumSamples,uint64_t & NumHeadSamples)69 static bool ParseHead(const StringRef &Input, StringRef &FName,
70 uint64_t &NumSamples, uint64_t &NumHeadSamples) {
71 if (Input[0] == ' ')
72 return false;
73 size_t n2 = Input.rfind(':');
74 size_t n1 = Input.rfind(':', n2 - 1);
75 FName = Input.substr(0, n1);
76 if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
77 return false;
78 if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
79 return false;
80 return true;
81 }
82
83 /// Returns true if line offset \p L is legal (only has 16 bits).
isOffsetLegal(unsigned L)84 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
85
86 /// Parse \p Input that contains metadata.
87 /// Possible metadata:
88 /// - CFG Checksum information:
89 /// !CFGChecksum: 12345
90 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
parseMetadata(const StringRef & Input,uint64_t & FunctionHash)91 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash) {
92 if (!Input.startswith("!CFGChecksum:"))
93 return false;
94
95 StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
96 return !CFGInfo.getAsInteger(10, FunctionHash);
97 }
98
99 enum class LineType {
100 CallSiteProfile,
101 BodyProfile,
102 Metadata,
103 };
104
105 /// Parse \p Input as line sample.
106 ///
107 /// \param Input input line.
108 /// \param LineTy Type of this line.
109 /// \param Depth the depth of the inline stack.
110 /// \param NumSamples total samples of the line/inlined callsite.
111 /// \param LineOffset line offset to the start of the function.
112 /// \param Discriminator discriminator of the line.
113 /// \param TargetCountMap map from indirect call target to count.
114 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
115 ///
116 /// returns true if parsing is successful.
ParseLine(const StringRef & Input,LineType & LineTy,uint32_t & Depth,uint64_t & NumSamples,uint32_t & LineOffset,uint32_t & Discriminator,StringRef & CalleeName,DenseMap<StringRef,uint64_t> & TargetCountMap,uint64_t & FunctionHash)117 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
118 uint64_t &NumSamples, uint32_t &LineOffset,
119 uint32_t &Discriminator, StringRef &CalleeName,
120 DenseMap<StringRef, uint64_t> &TargetCountMap,
121 uint64_t &FunctionHash) {
122 for (Depth = 0; Input[Depth] == ' '; Depth++)
123 ;
124 if (Depth == 0)
125 return false;
126
127 if (Depth == 1 && Input[Depth] == '!') {
128 LineTy = LineType::Metadata;
129 return parseMetadata(Input.substr(Depth), FunctionHash);
130 }
131
132 size_t n1 = Input.find(':');
133 StringRef Loc = Input.substr(Depth, n1 - Depth);
134 size_t n2 = Loc.find('.');
135 if (n2 == StringRef::npos) {
136 if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
137 return false;
138 Discriminator = 0;
139 } else {
140 if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
141 return false;
142 if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
143 return false;
144 }
145
146 StringRef Rest = Input.substr(n1 + 2);
147 if (isDigit(Rest[0])) {
148 LineTy = LineType::BodyProfile;
149 size_t n3 = Rest.find(' ');
150 if (n3 == StringRef::npos) {
151 if (Rest.getAsInteger(10, NumSamples))
152 return false;
153 } else {
154 if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
155 return false;
156 }
157 // Find call targets and their sample counts.
158 // Note: In some cases, there are symbols in the profile which are not
159 // mangled. To accommodate such cases, use colon + integer pairs as the
160 // anchor points.
161 // An example:
162 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
163 // ":1000" and ":437" are used as anchor points so the string above will
164 // be interpreted as
165 // target: _M_construct<char *>
166 // count: 1000
167 // target: string_view<std::allocator<char> >
168 // count: 437
169 while (n3 != StringRef::npos) {
170 n3 += Rest.substr(n3).find_first_not_of(' ');
171 Rest = Rest.substr(n3);
172 n3 = Rest.find_first_of(':');
173 if (n3 == StringRef::npos || n3 == 0)
174 return false;
175
176 StringRef Target;
177 uint64_t count, n4;
178 while (true) {
179 // Get the segment after the current colon.
180 StringRef AfterColon = Rest.substr(n3 + 1);
181 // Get the target symbol before the current colon.
182 Target = Rest.substr(0, n3);
183 // Check if the word after the current colon is an integer.
184 n4 = AfterColon.find_first_of(' ');
185 n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
186 StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
187 if (!WordAfterColon.getAsInteger(10, count))
188 break;
189
190 // Try to find the next colon.
191 uint64_t n5 = AfterColon.find_first_of(':');
192 if (n5 == StringRef::npos)
193 return false;
194 n3 += n5 + 1;
195 }
196
197 // An anchor point is found. Save the {target, count} pair
198 TargetCountMap[Target] = count;
199 if (n4 == Rest.size())
200 break;
201 // Change n3 to the next blank space after colon + integer pair.
202 n3 = n4;
203 }
204 } else {
205 LineTy = LineType::CallSiteProfile;
206 size_t n3 = Rest.find_last_of(':');
207 CalleeName = Rest.substr(0, n3);
208 if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
209 return false;
210 }
211 return true;
212 }
213
214 /// Load samples from a text file.
215 ///
216 /// See the documentation at the top of the file for an explanation of
217 /// the expected format.
218 ///
219 /// \returns true if the file was loaded successfully, false otherwise.
readImpl()220 std::error_code SampleProfileReaderText::readImpl() {
221 line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
222 sampleprof_error Result = sampleprof_error::success;
223
224 InlineCallStack InlineStack;
225 uint32_t ProbeProfileCount = 0;
226
227 // SeenMetadata tracks whether we have processed metadata for the current
228 // top-level function profile.
229 bool SeenMetadata = false;
230
231 for (; !LineIt.is_at_eof(); ++LineIt) {
232 if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
233 continue;
234 // Read the header of each function.
235 //
236 // Note that for function identifiers we are actually expecting
237 // mangled names, but we may not always get them. This happens when
238 // the compiler decides not to emit the function (e.g., it was inlined
239 // and removed). In this case, the binary will not have the linkage
240 // name for the function, so the profiler will emit the function's
241 // unmangled name, which may contain characters like ':' and '>' in its
242 // name (member functions, templates, etc).
243 //
244 // The only requirement we place on the identifier, then, is that it
245 // should not begin with a number.
246 if ((*LineIt)[0] != ' ') {
247 uint64_t NumSamples, NumHeadSamples;
248 StringRef FName;
249 if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
250 reportError(LineIt.line_number(),
251 "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
252 return sampleprof_error::malformed;
253 }
254 SeenMetadata = false;
255 SampleContext FContext(FName);
256 if (FContext.hasContext())
257 ++CSProfileCount;
258 Profiles[FContext] = FunctionSamples();
259 FunctionSamples &FProfile = Profiles[FContext];
260 FProfile.setName(FContext.getNameWithoutContext());
261 FProfile.setContext(FContext);
262 MergeResult(Result, FProfile.addTotalSamples(NumSamples));
263 MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
264 InlineStack.clear();
265 InlineStack.push_back(&FProfile);
266 } else {
267 uint64_t NumSamples;
268 StringRef FName;
269 DenseMap<StringRef, uint64_t> TargetCountMap;
270 uint32_t Depth, LineOffset, Discriminator;
271 LineType LineTy;
272 uint64_t FunctionHash;
273 if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
274 Discriminator, FName, TargetCountMap, FunctionHash)) {
275 reportError(LineIt.line_number(),
276 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
277 *LineIt);
278 return sampleprof_error::malformed;
279 }
280 if (SeenMetadata && LineTy != LineType::Metadata) {
281 // Metadata must be put at the end of a function profile.
282 reportError(LineIt.line_number(),
283 "Found non-metadata after metadata: " + *LineIt);
284 return sampleprof_error::malformed;
285 }
286 while (InlineStack.size() > Depth) {
287 InlineStack.pop_back();
288 }
289 switch (LineTy) {
290 case LineType::CallSiteProfile: {
291 FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
292 LineLocation(LineOffset, Discriminator))[std::string(FName)];
293 FSamples.setName(FName);
294 MergeResult(Result, FSamples.addTotalSamples(NumSamples));
295 InlineStack.push_back(&FSamples);
296 break;
297 }
298 case LineType::BodyProfile: {
299 while (InlineStack.size() > Depth) {
300 InlineStack.pop_back();
301 }
302 FunctionSamples &FProfile = *InlineStack.back();
303 for (const auto &name_count : TargetCountMap) {
304 MergeResult(Result, FProfile.addCalledTargetSamples(
305 LineOffset, Discriminator, name_count.first,
306 name_count.second));
307 }
308 MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
309 NumSamples));
310 break;
311 }
312 case LineType::Metadata: {
313 FunctionSamples &FProfile = *InlineStack.back();
314 FProfile.setFunctionHash(FunctionHash);
315 ++ProbeProfileCount;
316 SeenMetadata = true;
317 break;
318 }
319 }
320 }
321 }
322
323 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
324 "Cannot have both context-sensitive and regular profile");
325 ProfileIsCS = (CSProfileCount > 0);
326 assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
327 "Cannot have both probe-based profiles and regular profiles");
328 ProfileIsProbeBased = (ProbeProfileCount > 0);
329 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
330 FunctionSamples::ProfileIsCS = ProfileIsCS;
331
332 if (Result == sampleprof_error::success)
333 computeSummary();
334
335 return Result;
336 }
337
hasFormat(const MemoryBuffer & Buffer)338 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
339 bool result = false;
340
341 // Check that the first non-comment line is a valid function header.
342 line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
343 if (!LineIt.is_at_eof()) {
344 if ((*LineIt)[0] != ' ') {
345 uint64_t NumSamples, NumHeadSamples;
346 StringRef FName;
347 result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
348 }
349 }
350
351 return result;
352 }
353
readNumber()354 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
355 unsigned NumBytesRead = 0;
356 std::error_code EC;
357 uint64_t Val = decodeULEB128(Data, &NumBytesRead);
358
359 if (Val > std::numeric_limits<T>::max())
360 EC = sampleprof_error::malformed;
361 else if (Data + NumBytesRead > End)
362 EC = sampleprof_error::truncated;
363 else
364 EC = sampleprof_error::success;
365
366 if (EC) {
367 reportError(0, EC.message());
368 return EC;
369 }
370
371 Data += NumBytesRead;
372 return static_cast<T>(Val);
373 }
374
readString()375 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
376 std::error_code EC;
377 StringRef Str(reinterpret_cast<const char *>(Data));
378 if (Data + Str.size() + 1 > End) {
379 EC = sampleprof_error::truncated;
380 reportError(0, EC.message());
381 return EC;
382 }
383
384 Data += Str.size() + 1;
385 return Str;
386 }
387
388 template <typename T>
readUnencodedNumber()389 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
390 std::error_code EC;
391
392 if (Data + sizeof(T) > End) {
393 EC = sampleprof_error::truncated;
394 reportError(0, EC.message());
395 return EC;
396 }
397
398 using namespace support;
399 T Val = endian::readNext<T, little, unaligned>(Data);
400 return Val;
401 }
402
403 template <typename T>
readStringIndex(T & Table)404 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
405 std::error_code EC;
406 auto Idx = readNumber<uint32_t>();
407 if (std::error_code EC = Idx.getError())
408 return EC;
409 if (*Idx >= Table.size())
410 return sampleprof_error::truncated_name_table;
411 return *Idx;
412 }
413
readStringFromTable()414 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
415 auto Idx = readStringIndex(NameTable);
416 if (std::error_code EC = Idx.getError())
417 return EC;
418
419 return NameTable[*Idx];
420 }
421
readStringFromTable()422 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
423 if (!FixedLengthMD5)
424 return SampleProfileReaderBinary::readStringFromTable();
425
426 // read NameTable index.
427 auto Idx = readStringIndex(NameTable);
428 if (std::error_code EC = Idx.getError())
429 return EC;
430
431 // Check whether the name to be accessed has been accessed before,
432 // if not, read it from memory directly.
433 StringRef &SR = NameTable[*Idx];
434 if (SR.empty()) {
435 const uint8_t *SavedData = Data;
436 Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
437 auto FID = readUnencodedNumber<uint64_t>();
438 if (std::error_code EC = FID.getError())
439 return EC;
440 // Save the string converted from uint64_t in MD5StringBuf. All the
441 // references to the name are all StringRefs refering to the string
442 // in MD5StringBuf.
443 MD5StringBuf->push_back(std::to_string(*FID));
444 SR = MD5StringBuf->back();
445 Data = SavedData;
446 }
447 return SR;
448 }
449
readStringFromTable()450 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
451 auto Idx = readStringIndex(NameTable);
452 if (std::error_code EC = Idx.getError())
453 return EC;
454
455 return StringRef(NameTable[*Idx]);
456 }
457
458 std::error_code
readProfile(FunctionSamples & FProfile)459 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
460 auto NumSamples = readNumber<uint64_t>();
461 if (std::error_code EC = NumSamples.getError())
462 return EC;
463 FProfile.addTotalSamples(*NumSamples);
464
465 // Read the samples in the body.
466 auto NumRecords = readNumber<uint32_t>();
467 if (std::error_code EC = NumRecords.getError())
468 return EC;
469
470 for (uint32_t I = 0; I < *NumRecords; ++I) {
471 auto LineOffset = readNumber<uint64_t>();
472 if (std::error_code EC = LineOffset.getError())
473 return EC;
474
475 if (!isOffsetLegal(*LineOffset)) {
476 return std::error_code();
477 }
478
479 auto Discriminator = readNumber<uint64_t>();
480 if (std::error_code EC = Discriminator.getError())
481 return EC;
482
483 auto NumSamples = readNumber<uint64_t>();
484 if (std::error_code EC = NumSamples.getError())
485 return EC;
486
487 auto NumCalls = readNumber<uint32_t>();
488 if (std::error_code EC = NumCalls.getError())
489 return EC;
490
491 for (uint32_t J = 0; J < *NumCalls; ++J) {
492 auto CalledFunction(readStringFromTable());
493 if (std::error_code EC = CalledFunction.getError())
494 return EC;
495
496 auto CalledFunctionSamples = readNumber<uint64_t>();
497 if (std::error_code EC = CalledFunctionSamples.getError())
498 return EC;
499
500 FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
501 *CalledFunction, *CalledFunctionSamples);
502 }
503
504 FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
505 }
506
507 // Read all the samples for inlined function calls.
508 auto NumCallsites = readNumber<uint32_t>();
509 if (std::error_code EC = NumCallsites.getError())
510 return EC;
511
512 for (uint32_t J = 0; J < *NumCallsites; ++J) {
513 auto LineOffset = readNumber<uint64_t>();
514 if (std::error_code EC = LineOffset.getError())
515 return EC;
516
517 auto Discriminator = readNumber<uint64_t>();
518 if (std::error_code EC = Discriminator.getError())
519 return EC;
520
521 auto FName(readStringFromTable());
522 if (std::error_code EC = FName.getError())
523 return EC;
524
525 FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
526 LineLocation(*LineOffset, *Discriminator))[std::string(*FName)];
527 CalleeProfile.setName(*FName);
528 if (std::error_code EC = readProfile(CalleeProfile))
529 return EC;
530 }
531
532 return sampleprof_error::success;
533 }
534
535 std::error_code
readFuncProfile(const uint8_t * Start)536 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
537 Data = Start;
538 auto NumHeadSamples = readNumber<uint64_t>();
539 if (std::error_code EC = NumHeadSamples.getError())
540 return EC;
541
542 auto FName(readStringFromTable());
543 if (std::error_code EC = FName.getError())
544 return EC;
545
546 SampleContext FContext(*FName);
547 Profiles[FContext] = FunctionSamples();
548 FunctionSamples &FProfile = Profiles[FContext];
549 FProfile.setName(FContext.getNameWithoutContext());
550 FProfile.setContext(FContext);
551 FProfile.addHeadSamples(*NumHeadSamples);
552
553 if (FContext.hasContext())
554 CSProfileCount++;
555
556 if (std::error_code EC = readProfile(FProfile))
557 return EC;
558 return sampleprof_error::success;
559 }
560
readImpl()561 std::error_code SampleProfileReaderBinary::readImpl() {
562 while (!at_eof()) {
563 if (std::error_code EC = readFuncProfile(Data))
564 return EC;
565 }
566
567 return sampleprof_error::success;
568 }
569
readOneSection(const uint8_t * Start,uint64_t Size,const SecHdrTableEntry & Entry)570 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
571 const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
572 Data = Start;
573 End = Start + Size;
574 switch (Entry.Type) {
575 case SecProfSummary:
576 if (std::error_code EC = readSummary())
577 return EC;
578 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
579 Summary->setPartialProfile(true);
580 break;
581 case SecNameTable: {
582 FixedLengthMD5 =
583 hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
584 bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
585 assert((!FixedLengthMD5 || UseMD5) &&
586 "If FixedLengthMD5 is true, UseMD5 has to be true");
587 if (std::error_code EC = readNameTableSec(UseMD5))
588 return EC;
589 break;
590 }
591 case SecLBRProfile:
592 if (std::error_code EC = readFuncProfiles())
593 return EC;
594 break;
595 case SecFuncOffsetTable:
596 if (std::error_code EC = readFuncOffsetTable())
597 return EC;
598 break;
599 case SecFuncMetadata:
600 ProfileIsProbeBased =
601 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
602 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
603 if (std::error_code EC = readFuncMetadata())
604 return EC;
605 break;
606 case SecProfileSymbolList:
607 if (std::error_code EC = readProfileSymbolList())
608 return EC;
609 break;
610 default:
611 if (std::error_code EC = readCustomSection(Entry))
612 return EC;
613 break;
614 }
615 return sampleprof_error::success;
616 }
617
collectFuncsFrom(const Module & M)618 void SampleProfileReaderExtBinaryBase::collectFuncsFrom(const Module &M) {
619 UseAllFuncs = false;
620 FuncsToUse.clear();
621 for (auto &F : M)
622 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
623 }
624
readFuncOffsetTable()625 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
626 // If there are more than one FuncOffsetTable, the profile read associated
627 // with previous FuncOffsetTable has to be done before next FuncOffsetTable
628 // is read.
629 FuncOffsetTable.clear();
630
631 auto Size = readNumber<uint64_t>();
632 if (std::error_code EC = Size.getError())
633 return EC;
634
635 FuncOffsetTable.reserve(*Size);
636 for (uint32_t I = 0; I < *Size; ++I) {
637 auto FName(readStringFromTable());
638 if (std::error_code EC = FName.getError())
639 return EC;
640
641 auto Offset = readNumber<uint64_t>();
642 if (std::error_code EC = Offset.getError())
643 return EC;
644
645 FuncOffsetTable[*FName] = *Offset;
646 }
647 return sampleprof_error::success;
648 }
649
readFuncProfiles()650 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
651 const uint8_t *Start = Data;
652 if (UseAllFuncs) {
653 while (Data < End) {
654 if (std::error_code EC = readFuncProfile(Data))
655 return EC;
656 }
657 assert(Data == End && "More data is read than expected");
658 } else {
659 if (Remapper) {
660 for (auto Name : FuncsToUse) {
661 Remapper->insert(Name);
662 }
663 }
664
665 if (useMD5()) {
666 for (auto Name : FuncsToUse) {
667 auto GUID = std::to_string(MD5Hash(Name));
668 auto iter = FuncOffsetTable.find(StringRef(GUID));
669 if (iter == FuncOffsetTable.end())
670 continue;
671 const uint8_t *FuncProfileAddr = Start + iter->second;
672 assert(FuncProfileAddr < End && "out of LBRProfile section");
673 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
674 return EC;
675 }
676 } else {
677 for (auto NameOffset : FuncOffsetTable) {
678 SampleContext FContext(NameOffset.first);
679 auto FuncName = FContext.getNameWithoutContext();
680 if (!FuncsToUse.count(FuncName) &&
681 (!Remapper || !Remapper->exist(FuncName)))
682 continue;
683 const uint8_t *FuncProfileAddr = Start + NameOffset.second;
684 assert(FuncProfileAddr < End && "out of LBRProfile section");
685 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
686 return EC;
687 }
688 }
689 Data = End;
690 }
691
692 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
693 "Cannot have both context-sensitive and regular profile");
694 ProfileIsCS = (CSProfileCount > 0);
695 FunctionSamples::ProfileIsCS = ProfileIsCS;
696 return sampleprof_error::success;
697 }
698
readProfileSymbolList()699 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
700 if (!ProfSymList)
701 ProfSymList = std::make_unique<ProfileSymbolList>();
702
703 if (std::error_code EC = ProfSymList->read(Data, End - Data))
704 return EC;
705
706 Data = End;
707 return sampleprof_error::success;
708 }
709
decompressSection(const uint8_t * SecStart,const uint64_t SecSize,const uint8_t * & DecompressBuf,uint64_t & DecompressBufSize)710 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
711 const uint8_t *SecStart, const uint64_t SecSize,
712 const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
713 Data = SecStart;
714 End = SecStart + SecSize;
715 auto DecompressSize = readNumber<uint64_t>();
716 if (std::error_code EC = DecompressSize.getError())
717 return EC;
718 DecompressBufSize = *DecompressSize;
719
720 auto CompressSize = readNumber<uint64_t>();
721 if (std::error_code EC = CompressSize.getError())
722 return EC;
723
724 if (!llvm::zlib::isAvailable())
725 return sampleprof_error::zlib_unavailable;
726
727 StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
728 *CompressSize);
729 char *Buffer = Allocator.Allocate<char>(DecompressBufSize);
730 size_t UCSize = DecompressBufSize;
731 llvm::Error E =
732 zlib::uncompress(CompressedStrings, Buffer, UCSize);
733 if (E)
734 return sampleprof_error::uncompress_failed;
735 DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
736 return sampleprof_error::success;
737 }
738
readImpl()739 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
740 const uint8_t *BufStart =
741 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
742
743 for (auto &Entry : SecHdrTable) {
744 // Skip empty section.
745 if (!Entry.Size)
746 continue;
747
748 // Skip sections without context when SkipFlatProf is true.
749 if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
750 continue;
751
752 const uint8_t *SecStart = BufStart + Entry.Offset;
753 uint64_t SecSize = Entry.Size;
754
755 // If the section is compressed, decompress it into a buffer
756 // DecompressBuf before reading the actual data. The pointee of
757 // 'Data' will be changed to buffer hold by DecompressBuf
758 // temporarily when reading the actual data.
759 bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
760 if (isCompressed) {
761 const uint8_t *DecompressBuf;
762 uint64_t DecompressBufSize;
763 if (std::error_code EC = decompressSection(
764 SecStart, SecSize, DecompressBuf, DecompressBufSize))
765 return EC;
766 SecStart = DecompressBuf;
767 SecSize = DecompressBufSize;
768 }
769
770 if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
771 return EC;
772 if (Data != SecStart + SecSize)
773 return sampleprof_error::malformed;
774
775 // Change the pointee of 'Data' from DecompressBuf to original Buffer.
776 if (isCompressed) {
777 Data = BufStart + Entry.Offset;
778 End = BufStart + Buffer->getBufferSize();
779 }
780 }
781
782 return sampleprof_error::success;
783 }
784
readImpl()785 std::error_code SampleProfileReaderCompactBinary::readImpl() {
786 std::vector<uint64_t> OffsetsToUse;
787 if (UseAllFuncs) {
788 for (auto FuncEntry : FuncOffsetTable) {
789 OffsetsToUse.push_back(FuncEntry.second);
790 }
791 }
792 else {
793 for (auto Name : FuncsToUse) {
794 auto GUID = std::to_string(MD5Hash(Name));
795 auto iter = FuncOffsetTable.find(StringRef(GUID));
796 if (iter == FuncOffsetTable.end())
797 continue;
798 OffsetsToUse.push_back(iter->second);
799 }
800 }
801
802 for (auto Offset : OffsetsToUse) {
803 const uint8_t *SavedData = Data;
804 if (std::error_code EC = readFuncProfile(
805 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
806 Offset))
807 return EC;
808 Data = SavedData;
809 }
810 return sampleprof_error::success;
811 }
812
verifySPMagic(uint64_t Magic)813 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
814 if (Magic == SPMagic())
815 return sampleprof_error::success;
816 return sampleprof_error::bad_magic;
817 }
818
verifySPMagic(uint64_t Magic)819 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
820 if (Magic == SPMagic(SPF_Ext_Binary))
821 return sampleprof_error::success;
822 return sampleprof_error::bad_magic;
823 }
824
825 std::error_code
verifySPMagic(uint64_t Magic)826 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
827 if (Magic == SPMagic(SPF_Compact_Binary))
828 return sampleprof_error::success;
829 return sampleprof_error::bad_magic;
830 }
831
readNameTable()832 std::error_code SampleProfileReaderBinary::readNameTable() {
833 auto Size = readNumber<uint32_t>();
834 if (std::error_code EC = Size.getError())
835 return EC;
836 NameTable.reserve(*Size + NameTable.size());
837 for (uint32_t I = 0; I < *Size; ++I) {
838 auto Name(readString());
839 if (std::error_code EC = Name.getError())
840 return EC;
841 NameTable.push_back(*Name);
842 }
843
844 return sampleprof_error::success;
845 }
846
readMD5NameTable()847 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
848 auto Size = readNumber<uint64_t>();
849 if (std::error_code EC = Size.getError())
850 return EC;
851 MD5StringBuf = std::make_unique<std::vector<std::string>>();
852 MD5StringBuf->reserve(*Size);
853 if (FixedLengthMD5) {
854 // Preallocate and initialize NameTable so we can check whether a name
855 // index has been read before by checking whether the element in the
856 // NameTable is empty, meanwhile readStringIndex can do the boundary
857 // check using the size of NameTable.
858 NameTable.resize(*Size + NameTable.size());
859
860 MD5NameMemStart = Data;
861 Data = Data + (*Size) * sizeof(uint64_t);
862 return sampleprof_error::success;
863 }
864 NameTable.reserve(*Size);
865 for (uint32_t I = 0; I < *Size; ++I) {
866 auto FID = readNumber<uint64_t>();
867 if (std::error_code EC = FID.getError())
868 return EC;
869 MD5StringBuf->push_back(std::to_string(*FID));
870 // NameTable is a vector of StringRef. Here it is pushing back a
871 // StringRef initialized with the last string in MD5stringBuf.
872 NameTable.push_back(MD5StringBuf->back());
873 }
874 return sampleprof_error::success;
875 }
876
readNameTableSec(bool IsMD5)877 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
878 if (IsMD5)
879 return readMD5NameTable();
880 return SampleProfileReaderBinary::readNameTable();
881 }
882
readFuncMetadata()883 std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
884 if (!ProfileIsProbeBased)
885 return sampleprof_error::success;
886 while (Data < End) {
887 auto FName(readStringFromTable());
888 if (std::error_code EC = FName.getError())
889 return EC;
890
891 auto Checksum = readNumber<uint64_t>();
892 if (std::error_code EC = Checksum.getError())
893 return EC;
894
895 SampleContext FContext(*FName);
896 // No need to load metadata for profiles that are not loaded in the current
897 // module.
898 if (Profiles.count(FContext))
899 Profiles[FContext].setFunctionHash(*Checksum);
900 }
901
902 assert(Data == End && "More data is read than expected");
903 return sampleprof_error::success;
904 }
905
readNameTable()906 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
907 auto Size = readNumber<uint64_t>();
908 if (std::error_code EC = Size.getError())
909 return EC;
910 NameTable.reserve(*Size);
911 for (uint32_t I = 0; I < *Size; ++I) {
912 auto FID = readNumber<uint64_t>();
913 if (std::error_code EC = FID.getError())
914 return EC;
915 NameTable.push_back(std::to_string(*FID));
916 }
917 return sampleprof_error::success;
918 }
919
920 std::error_code
readSecHdrTableEntry(uint32_t Idx)921 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
922 SecHdrTableEntry Entry;
923 auto Type = readUnencodedNumber<uint64_t>();
924 if (std::error_code EC = Type.getError())
925 return EC;
926 Entry.Type = static_cast<SecType>(*Type);
927
928 auto Flags = readUnencodedNumber<uint64_t>();
929 if (std::error_code EC = Flags.getError())
930 return EC;
931 Entry.Flags = *Flags;
932
933 auto Offset = readUnencodedNumber<uint64_t>();
934 if (std::error_code EC = Offset.getError())
935 return EC;
936 Entry.Offset = *Offset;
937
938 auto Size = readUnencodedNumber<uint64_t>();
939 if (std::error_code EC = Size.getError())
940 return EC;
941 Entry.Size = *Size;
942
943 Entry.LayoutIndex = Idx;
944 SecHdrTable.push_back(std::move(Entry));
945 return sampleprof_error::success;
946 }
947
readSecHdrTable()948 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
949 auto EntryNum = readUnencodedNumber<uint64_t>();
950 if (std::error_code EC = EntryNum.getError())
951 return EC;
952
953 for (uint32_t i = 0; i < (*EntryNum); i++)
954 if (std::error_code EC = readSecHdrTableEntry(i))
955 return EC;
956
957 return sampleprof_error::success;
958 }
959
readHeader()960 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
961 const uint8_t *BufStart =
962 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
963 Data = BufStart;
964 End = BufStart + Buffer->getBufferSize();
965
966 if (std::error_code EC = readMagicIdent())
967 return EC;
968
969 if (std::error_code EC = readSecHdrTable())
970 return EC;
971
972 return sampleprof_error::success;
973 }
974
getSectionSize(SecType Type)975 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
976 uint64_t Size = 0;
977 for (auto &Entry : SecHdrTable) {
978 if (Entry.Type == Type)
979 Size += Entry.Size;
980 }
981 return Size;
982 }
983
getFileSize()984 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
985 // Sections in SecHdrTable is not necessarily in the same order as
986 // sections in the profile because section like FuncOffsetTable needs
987 // to be written after section LBRProfile but needs to be read before
988 // section LBRProfile, so we cannot simply use the last entry in
989 // SecHdrTable to calculate the file size.
990 uint64_t FileSize = 0;
991 for (auto &Entry : SecHdrTable) {
992 FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
993 }
994 return FileSize;
995 }
996
getSecFlagsStr(const SecHdrTableEntry & Entry)997 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
998 std::string Flags;
999 if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
1000 Flags.append("{compressed,");
1001 else
1002 Flags.append("{");
1003
1004 if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1005 Flags.append("flat,");
1006
1007 switch (Entry.Type) {
1008 case SecNameTable:
1009 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
1010 Flags.append("fixlenmd5,");
1011 else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
1012 Flags.append("md5,");
1013 break;
1014 case SecProfSummary:
1015 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
1016 Flags.append("partial,");
1017 break;
1018 default:
1019 break;
1020 }
1021 char &last = Flags.back();
1022 if (last == ',')
1023 last = '}';
1024 else
1025 Flags.append("}");
1026 return Flags;
1027 }
1028
dumpSectionInfo(raw_ostream & OS)1029 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1030 uint64_t TotalSecsSize = 0;
1031 for (auto &Entry : SecHdrTable) {
1032 OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1033 << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1034 << "\n";
1035 ;
1036 TotalSecsSize += Entry.Size;
1037 }
1038 uint64_t HeaderSize = SecHdrTable.front().Offset;
1039 assert(HeaderSize + TotalSecsSize == getFileSize() &&
1040 "Size of 'header + sections' doesn't match the total size of profile");
1041
1042 OS << "Header Size: " << HeaderSize << "\n";
1043 OS << "Total Sections Size: " << TotalSecsSize << "\n";
1044 OS << "File Size: " << getFileSize() << "\n";
1045 return true;
1046 }
1047
readMagicIdent()1048 std::error_code SampleProfileReaderBinary::readMagicIdent() {
1049 // Read and check the magic identifier.
1050 auto Magic = readNumber<uint64_t>();
1051 if (std::error_code EC = Magic.getError())
1052 return EC;
1053 else if (std::error_code EC = verifySPMagic(*Magic))
1054 return EC;
1055
1056 // Read the version number.
1057 auto Version = readNumber<uint64_t>();
1058 if (std::error_code EC = Version.getError())
1059 return EC;
1060 else if (*Version != SPVersion())
1061 return sampleprof_error::unsupported_version;
1062
1063 return sampleprof_error::success;
1064 }
1065
readHeader()1066 std::error_code SampleProfileReaderBinary::readHeader() {
1067 Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1068 End = Data + Buffer->getBufferSize();
1069
1070 if (std::error_code EC = readMagicIdent())
1071 return EC;
1072
1073 if (std::error_code EC = readSummary())
1074 return EC;
1075
1076 if (std::error_code EC = readNameTable())
1077 return EC;
1078 return sampleprof_error::success;
1079 }
1080
readHeader()1081 std::error_code SampleProfileReaderCompactBinary::readHeader() {
1082 SampleProfileReaderBinary::readHeader();
1083 if (std::error_code EC = readFuncOffsetTable())
1084 return EC;
1085 return sampleprof_error::success;
1086 }
1087
readFuncOffsetTable()1088 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
1089 auto TableOffset = readUnencodedNumber<uint64_t>();
1090 if (std::error_code EC = TableOffset.getError())
1091 return EC;
1092
1093 const uint8_t *SavedData = Data;
1094 const uint8_t *TableStart =
1095 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1096 *TableOffset;
1097 Data = TableStart;
1098
1099 auto Size = readNumber<uint64_t>();
1100 if (std::error_code EC = Size.getError())
1101 return EC;
1102
1103 FuncOffsetTable.reserve(*Size);
1104 for (uint32_t I = 0; I < *Size; ++I) {
1105 auto FName(readStringFromTable());
1106 if (std::error_code EC = FName.getError())
1107 return EC;
1108
1109 auto Offset = readNumber<uint64_t>();
1110 if (std::error_code EC = Offset.getError())
1111 return EC;
1112
1113 FuncOffsetTable[*FName] = *Offset;
1114 }
1115 End = TableStart;
1116 Data = SavedData;
1117 return sampleprof_error::success;
1118 }
1119
collectFuncsFrom(const Module & M)1120 void SampleProfileReaderCompactBinary::collectFuncsFrom(const Module &M) {
1121 UseAllFuncs = false;
1122 FuncsToUse.clear();
1123 for (auto &F : M)
1124 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
1125 }
1126
readSummaryEntry(std::vector<ProfileSummaryEntry> & Entries)1127 std::error_code SampleProfileReaderBinary::readSummaryEntry(
1128 std::vector<ProfileSummaryEntry> &Entries) {
1129 auto Cutoff = readNumber<uint64_t>();
1130 if (std::error_code EC = Cutoff.getError())
1131 return EC;
1132
1133 auto MinBlockCount = readNumber<uint64_t>();
1134 if (std::error_code EC = MinBlockCount.getError())
1135 return EC;
1136
1137 auto NumBlocks = readNumber<uint64_t>();
1138 if (std::error_code EC = NumBlocks.getError())
1139 return EC;
1140
1141 Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1142 return sampleprof_error::success;
1143 }
1144
readSummary()1145 std::error_code SampleProfileReaderBinary::readSummary() {
1146 auto TotalCount = readNumber<uint64_t>();
1147 if (std::error_code EC = TotalCount.getError())
1148 return EC;
1149
1150 auto MaxBlockCount = readNumber<uint64_t>();
1151 if (std::error_code EC = MaxBlockCount.getError())
1152 return EC;
1153
1154 auto MaxFunctionCount = readNumber<uint64_t>();
1155 if (std::error_code EC = MaxFunctionCount.getError())
1156 return EC;
1157
1158 auto NumBlocks = readNumber<uint64_t>();
1159 if (std::error_code EC = NumBlocks.getError())
1160 return EC;
1161
1162 auto NumFunctions = readNumber<uint64_t>();
1163 if (std::error_code EC = NumFunctions.getError())
1164 return EC;
1165
1166 auto NumSummaryEntries = readNumber<uint64_t>();
1167 if (std::error_code EC = NumSummaryEntries.getError())
1168 return EC;
1169
1170 std::vector<ProfileSummaryEntry> Entries;
1171 for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1172 std::error_code EC = readSummaryEntry(Entries);
1173 if (EC != sampleprof_error::success)
1174 return EC;
1175 }
1176 Summary = std::make_unique<ProfileSummary>(
1177 ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1178 *MaxFunctionCount, *NumBlocks, *NumFunctions);
1179
1180 return sampleprof_error::success;
1181 }
1182
hasFormat(const MemoryBuffer & Buffer)1183 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1184 const uint8_t *Data =
1185 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1186 uint64_t Magic = decodeULEB128(Data);
1187 return Magic == SPMagic();
1188 }
1189
hasFormat(const MemoryBuffer & Buffer)1190 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1191 const uint8_t *Data =
1192 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1193 uint64_t Magic = decodeULEB128(Data);
1194 return Magic == SPMagic(SPF_Ext_Binary);
1195 }
1196
hasFormat(const MemoryBuffer & Buffer)1197 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
1198 const uint8_t *Data =
1199 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1200 uint64_t Magic = decodeULEB128(Data);
1201 return Magic == SPMagic(SPF_Compact_Binary);
1202 }
1203
skipNextWord()1204 std::error_code SampleProfileReaderGCC::skipNextWord() {
1205 uint32_t dummy;
1206 if (!GcovBuffer.readInt(dummy))
1207 return sampleprof_error::truncated;
1208 return sampleprof_error::success;
1209 }
1210
readNumber()1211 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1212 if (sizeof(T) <= sizeof(uint32_t)) {
1213 uint32_t Val;
1214 if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1215 return static_cast<T>(Val);
1216 } else if (sizeof(T) <= sizeof(uint64_t)) {
1217 uint64_t Val;
1218 if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1219 return static_cast<T>(Val);
1220 }
1221
1222 std::error_code EC = sampleprof_error::malformed;
1223 reportError(0, EC.message());
1224 return EC;
1225 }
1226
readString()1227 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1228 StringRef Str;
1229 if (!GcovBuffer.readString(Str))
1230 return sampleprof_error::truncated;
1231 return Str;
1232 }
1233
readHeader()1234 std::error_code SampleProfileReaderGCC::readHeader() {
1235 // Read the magic identifier.
1236 if (!GcovBuffer.readGCDAFormat())
1237 return sampleprof_error::unrecognized_format;
1238
1239 // Read the version number. Note - the GCC reader does not validate this
1240 // version, but the profile creator generates v704.
1241 GCOV::GCOVVersion version;
1242 if (!GcovBuffer.readGCOVVersion(version))
1243 return sampleprof_error::unrecognized_format;
1244
1245 if (version != GCOV::V407)
1246 return sampleprof_error::unsupported_version;
1247
1248 // Skip the empty integer.
1249 if (std::error_code EC = skipNextWord())
1250 return EC;
1251
1252 return sampleprof_error::success;
1253 }
1254
readSectionTag(uint32_t Expected)1255 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1256 uint32_t Tag;
1257 if (!GcovBuffer.readInt(Tag))
1258 return sampleprof_error::truncated;
1259
1260 if (Tag != Expected)
1261 return sampleprof_error::malformed;
1262
1263 if (std::error_code EC = skipNextWord())
1264 return EC;
1265
1266 return sampleprof_error::success;
1267 }
1268
readNameTable()1269 std::error_code SampleProfileReaderGCC::readNameTable() {
1270 if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1271 return EC;
1272
1273 uint32_t Size;
1274 if (!GcovBuffer.readInt(Size))
1275 return sampleprof_error::truncated;
1276
1277 for (uint32_t I = 0; I < Size; ++I) {
1278 StringRef Str;
1279 if (!GcovBuffer.readString(Str))
1280 return sampleprof_error::truncated;
1281 Names.push_back(std::string(Str));
1282 }
1283
1284 return sampleprof_error::success;
1285 }
1286
readFunctionProfiles()1287 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1288 if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1289 return EC;
1290
1291 uint32_t NumFunctions;
1292 if (!GcovBuffer.readInt(NumFunctions))
1293 return sampleprof_error::truncated;
1294
1295 InlineCallStack Stack;
1296 for (uint32_t I = 0; I < NumFunctions; ++I)
1297 if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1298 return EC;
1299
1300 computeSummary();
1301 return sampleprof_error::success;
1302 }
1303
readOneFunctionProfile(const InlineCallStack & InlineStack,bool Update,uint32_t Offset)1304 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1305 const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1306 uint64_t HeadCount = 0;
1307 if (InlineStack.size() == 0)
1308 if (!GcovBuffer.readInt64(HeadCount))
1309 return sampleprof_error::truncated;
1310
1311 uint32_t NameIdx;
1312 if (!GcovBuffer.readInt(NameIdx))
1313 return sampleprof_error::truncated;
1314
1315 StringRef Name(Names[NameIdx]);
1316
1317 uint32_t NumPosCounts;
1318 if (!GcovBuffer.readInt(NumPosCounts))
1319 return sampleprof_error::truncated;
1320
1321 uint32_t NumCallsites;
1322 if (!GcovBuffer.readInt(NumCallsites))
1323 return sampleprof_error::truncated;
1324
1325 FunctionSamples *FProfile = nullptr;
1326 if (InlineStack.size() == 0) {
1327 // If this is a top function that we have already processed, do not
1328 // update its profile again. This happens in the presence of
1329 // function aliases. Since these aliases share the same function
1330 // body, there will be identical replicated profiles for the
1331 // original function. In this case, we simply not bother updating
1332 // the profile of the original function.
1333 FProfile = &Profiles[Name];
1334 FProfile->addHeadSamples(HeadCount);
1335 if (FProfile->getTotalSamples() > 0)
1336 Update = false;
1337 } else {
1338 // Otherwise, we are reading an inlined instance. The top of the
1339 // inline stack contains the profile of the caller. Insert this
1340 // callee in the caller's CallsiteMap.
1341 FunctionSamples *CallerProfile = InlineStack.front();
1342 uint32_t LineOffset = Offset >> 16;
1343 uint32_t Discriminator = Offset & 0xffff;
1344 FProfile = &CallerProfile->functionSamplesAt(
1345 LineLocation(LineOffset, Discriminator))[std::string(Name)];
1346 }
1347 FProfile->setName(Name);
1348
1349 for (uint32_t I = 0; I < NumPosCounts; ++I) {
1350 uint32_t Offset;
1351 if (!GcovBuffer.readInt(Offset))
1352 return sampleprof_error::truncated;
1353
1354 uint32_t NumTargets;
1355 if (!GcovBuffer.readInt(NumTargets))
1356 return sampleprof_error::truncated;
1357
1358 uint64_t Count;
1359 if (!GcovBuffer.readInt64(Count))
1360 return sampleprof_error::truncated;
1361
1362 // The line location is encoded in the offset as:
1363 // high 16 bits: line offset to the start of the function.
1364 // low 16 bits: discriminator.
1365 uint32_t LineOffset = Offset >> 16;
1366 uint32_t Discriminator = Offset & 0xffff;
1367
1368 InlineCallStack NewStack;
1369 NewStack.push_back(FProfile);
1370 llvm::append_range(NewStack, InlineStack);
1371 if (Update) {
1372 // Walk up the inline stack, adding the samples on this line to
1373 // the total sample count of the callers in the chain.
1374 for (auto CallerProfile : NewStack)
1375 CallerProfile->addTotalSamples(Count);
1376
1377 // Update the body samples for the current profile.
1378 FProfile->addBodySamples(LineOffset, Discriminator, Count);
1379 }
1380
1381 // Process the list of functions called at an indirect call site.
1382 // These are all the targets that a function pointer (or virtual
1383 // function) resolved at runtime.
1384 for (uint32_t J = 0; J < NumTargets; J++) {
1385 uint32_t HistVal;
1386 if (!GcovBuffer.readInt(HistVal))
1387 return sampleprof_error::truncated;
1388
1389 if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1390 return sampleprof_error::malformed;
1391
1392 uint64_t TargetIdx;
1393 if (!GcovBuffer.readInt64(TargetIdx))
1394 return sampleprof_error::truncated;
1395 StringRef TargetName(Names[TargetIdx]);
1396
1397 uint64_t TargetCount;
1398 if (!GcovBuffer.readInt64(TargetCount))
1399 return sampleprof_error::truncated;
1400
1401 if (Update)
1402 FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1403 TargetName, TargetCount);
1404 }
1405 }
1406
1407 // Process all the inlined callers into the current function. These
1408 // are all the callsites that were inlined into this function.
1409 for (uint32_t I = 0; I < NumCallsites; I++) {
1410 // The offset is encoded as:
1411 // high 16 bits: line offset to the start of the function.
1412 // low 16 bits: discriminator.
1413 uint32_t Offset;
1414 if (!GcovBuffer.readInt(Offset))
1415 return sampleprof_error::truncated;
1416 InlineCallStack NewStack;
1417 NewStack.push_back(FProfile);
1418 llvm::append_range(NewStack, InlineStack);
1419 if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1420 return EC;
1421 }
1422
1423 return sampleprof_error::success;
1424 }
1425
1426 /// Read a GCC AutoFDO profile.
1427 ///
1428 /// This format is generated by the Linux Perf conversion tool at
1429 /// https://github.com/google/autofdo.
readImpl()1430 std::error_code SampleProfileReaderGCC::readImpl() {
1431 // Read the string table.
1432 if (std::error_code EC = readNameTable())
1433 return EC;
1434
1435 // Read the source profile.
1436 if (std::error_code EC = readFunctionProfiles())
1437 return EC;
1438
1439 return sampleprof_error::success;
1440 }
1441
hasFormat(const MemoryBuffer & Buffer)1442 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1443 StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1444 return Magic == "adcg*704";
1445 }
1446
applyRemapping(LLVMContext & Ctx)1447 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1448 // If the reader uses MD5 to represent string, we can't remap it because
1449 // we don't know what the original function names were.
1450 if (Reader.useMD5()) {
1451 Ctx.diagnose(DiagnosticInfoSampleProfile(
1452 Reader.getBuffer()->getBufferIdentifier(),
1453 "Profile data remapping cannot be applied to profile data "
1454 "in compact format (original mangled names are not available).",
1455 DS_Warning));
1456 return;
1457 }
1458
1459 // CSSPGO-TODO: Remapper is not yet supported.
1460 // We will need to remap the entire context string.
1461 assert(Remappings && "should be initialized while creating remapper");
1462 for (auto &Sample : Reader.getProfiles()) {
1463 DenseSet<StringRef> NamesInSample;
1464 Sample.second.findAllNames(NamesInSample);
1465 for (auto &Name : NamesInSample)
1466 if (auto Key = Remappings->insert(Name))
1467 NameMap.insert({Key, Name});
1468 }
1469
1470 RemappingApplied = true;
1471 }
1472
1473 Optional<StringRef>
lookUpNameInProfile(StringRef Fname)1474 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1475 if (auto Key = Remappings->lookup(Fname))
1476 return NameMap.lookup(Key);
1477 return None;
1478 }
1479
1480 /// Prepare a memory buffer for the contents of \p Filename.
1481 ///
1482 /// \returns an error code indicating the status of the buffer.
1483 static ErrorOr<std::unique_ptr<MemoryBuffer>>
setupMemoryBuffer(const Twine & Filename)1484 setupMemoryBuffer(const Twine &Filename) {
1485 auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
1486 if (std::error_code EC = BufferOrErr.getError())
1487 return EC;
1488 auto Buffer = std::move(BufferOrErr.get());
1489
1490 // Sanity check the file.
1491 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
1492 return sampleprof_error::too_large;
1493
1494 return std::move(Buffer);
1495 }
1496
1497 /// Create a sample profile reader based on the format of the input file.
1498 ///
1499 /// \param Filename The file to open.
1500 ///
1501 /// \param C The LLVM context to use to emit diagnostics.
1502 ///
1503 /// \param RemapFilename The file used for profile remapping.
1504 ///
1505 /// \returns an error code indicating the status of the created reader.
1506 ErrorOr<std::unique_ptr<SampleProfileReader>>
create(const std::string Filename,LLVMContext & C,const std::string RemapFilename)1507 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1508 const std::string RemapFilename) {
1509 auto BufferOrError = setupMemoryBuffer(Filename);
1510 if (std::error_code EC = BufferOrError.getError())
1511 return EC;
1512 return create(BufferOrError.get(), C, RemapFilename);
1513 }
1514
1515 /// Create a sample profile remapper from the given input, to remap the
1516 /// function names in the given profile data.
1517 ///
1518 /// \param Filename The file to open.
1519 ///
1520 /// \param Reader The profile reader the remapper is going to be applied to.
1521 ///
1522 /// \param C The LLVM context to use to emit diagnostics.
1523 ///
1524 /// \returns an error code indicating the status of the created reader.
1525 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
create(const std::string Filename,SampleProfileReader & Reader,LLVMContext & C)1526 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1527 SampleProfileReader &Reader,
1528 LLVMContext &C) {
1529 auto BufferOrError = setupMemoryBuffer(Filename);
1530 if (std::error_code EC = BufferOrError.getError())
1531 return EC;
1532 return create(BufferOrError.get(), Reader, C);
1533 }
1534
1535 /// Create a sample profile remapper from the given input, to remap the
1536 /// function names in the given profile data.
1537 ///
1538 /// \param B The memory buffer to create the reader from (assumes ownership).
1539 ///
1540 /// \param C The LLVM context to use to emit diagnostics.
1541 ///
1542 /// \param Reader The profile reader the remapper is going to be applied to.
1543 ///
1544 /// \returns an error code indicating the status of the created reader.
1545 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
create(std::unique_ptr<MemoryBuffer> & B,SampleProfileReader & Reader,LLVMContext & C)1546 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1547 SampleProfileReader &Reader,
1548 LLVMContext &C) {
1549 auto Remappings = std::make_unique<SymbolRemappingReader>();
1550 if (Error E = Remappings->read(*B.get())) {
1551 handleAllErrors(
1552 std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1553 C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1554 ParseError.getLineNum(),
1555 ParseError.getMessage()));
1556 });
1557 return sampleprof_error::malformed;
1558 }
1559
1560 return std::make_unique<SampleProfileReaderItaniumRemapper>(
1561 std::move(B), std::move(Remappings), Reader);
1562 }
1563
1564 /// Create a sample profile reader based on the format of the input data.
1565 ///
1566 /// \param B The memory buffer to create the reader from (assumes ownership).
1567 ///
1568 /// \param C The LLVM context to use to emit diagnostics.
1569 ///
1570 /// \param RemapFilename The file used for profile remapping.
1571 ///
1572 /// \returns an error code indicating the status of the created reader.
1573 ErrorOr<std::unique_ptr<SampleProfileReader>>
create(std::unique_ptr<MemoryBuffer> & B,LLVMContext & C,const std::string RemapFilename)1574 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1575 const std::string RemapFilename) {
1576 std::unique_ptr<SampleProfileReader> Reader;
1577 if (SampleProfileReaderRawBinary::hasFormat(*B))
1578 Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1579 else if (SampleProfileReaderExtBinary::hasFormat(*B))
1580 Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1581 else if (SampleProfileReaderCompactBinary::hasFormat(*B))
1582 Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
1583 else if (SampleProfileReaderGCC::hasFormat(*B))
1584 Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1585 else if (SampleProfileReaderText::hasFormat(*B))
1586 Reader.reset(new SampleProfileReaderText(std::move(B), C));
1587 else
1588 return sampleprof_error::unrecognized_format;
1589
1590 if (!RemapFilename.empty()) {
1591 auto ReaderOrErr =
1592 SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
1593 if (std::error_code EC = ReaderOrErr.getError()) {
1594 std::string Msg = "Could not create remapper: " + EC.message();
1595 C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1596 return EC;
1597 }
1598 Reader->Remapper = std::move(ReaderOrErr.get());
1599 }
1600
1601 FunctionSamples::Format = Reader->getFormat();
1602 if (std::error_code EC = Reader->readHeader()) {
1603 return EC;
1604 }
1605
1606 return std::move(Reader);
1607 }
1608
1609 // For text and GCC file formats, we compute the summary after reading the
1610 // profile. Binary format has the profile summary in its header.
computeSummary()1611 void SampleProfileReader::computeSummary() {
1612 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1613 Summary = Builder.computeSummaryForProfiles(Profiles);
1614 }
1615