1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading profiling data for instrumentation
10 // based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/IR/ProfileSummary.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/LineIterator.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/OnDiskHashTable.h"
26 #include "llvm/Support/SwapByteOrder.h"
27 #include <algorithm>
28 #include <cassert>
29 #include <cstddef>
30 #include <cstdint>
31 #include <iterator>
32 #include <memory>
33 #include <utility>
34 #include <vector>
35 
36 namespace llvm {
37 
38 class InstrProfReader;
39 
40 /// A file format agnostic iterator over profiling data.
41 class InstrProfIterator : public std::iterator<std::input_iterator_tag,
42                                                NamedInstrProfRecord> {
43   InstrProfReader *Reader = nullptr;
44   value_type Record;
45 
46   void Increment();
47 
48 public:
49   InstrProfIterator() = default;
50   InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
51 
52   InstrProfIterator &operator++() { Increment(); return *this; }
53   bool operator==(const InstrProfIterator &RHS) const {
54     return Reader == RHS.Reader;
55   }
56   bool operator!=(const InstrProfIterator &RHS) const {
57     return Reader != RHS.Reader;
58   }
59   value_type &operator*() { return Record; }
60   value_type *operator->() { return &Record; }
61 };
62 
63 /// Base class and interface for reading profiling data of any known instrprof
64 /// format. Provides an iterator over NamedInstrProfRecords.
65 class InstrProfReader {
66   instrprof_error LastError = instrprof_error::success;
67 
68 public:
69   InstrProfReader() = default;
70   virtual ~InstrProfReader() = default;
71 
72   /// Read the header.  Required before reading first record.
73   virtual Error readHeader() = 0;
74 
75   /// Read a single record.
76   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
77 
78   /// Iterator over profile data.
79   InstrProfIterator begin() { return InstrProfIterator(this); }
80   InstrProfIterator end() { return InstrProfIterator(); }
81 
82   virtual bool isIRLevelProfile() const = 0;
83 
84   virtual bool hasCSIRLevelProfile() const = 0;
85 
86   virtual bool instrEntryBBEnabled() const = 0;
87 
88   /// Return the PGO symtab. There are three different readers:
89   /// Raw, Text, and Indexed profile readers. The first two types
90   /// of readers are used only by llvm-profdata tool, while the indexed
91   /// profile reader is also used by llvm-cov tool and the compiler (
92   /// backend or frontend). Since creating PGO symtab can create
93   /// significant runtime and memory overhead (as it touches data
94   /// for the whole program), InstrProfSymtab for the indexed profile
95   /// reader should be created on demand and it is recommended to be
96   /// only used for dumping purpose with llvm-proftool, not with the
97   /// compiler.
98   virtual InstrProfSymtab &getSymtab() = 0;
99 
100   /// Compute the sum of counts and return in Sum.
101   void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
102 
103 protected:
104   std::unique_ptr<InstrProfSymtab> Symtab;
105 
106   /// Set the current error and return same.
107   Error error(instrprof_error Err) {
108     LastError = Err;
109     if (Err == instrprof_error::success)
110       return Error::success();
111     return make_error<InstrProfError>(Err);
112   }
113 
114   Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
115 
116   /// Clear the current error and return a successful one.
117   Error success() { return error(instrprof_error::success); }
118 
119 public:
120   /// Return true if the reader has finished reading the profile data.
121   bool isEOF() { return LastError == instrprof_error::eof; }
122 
123   /// Return true if the reader encountered an error reading profiling data.
124   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
125 
126   /// Get the current error.
127   Error getError() {
128     if (hasError())
129       return make_error<InstrProfError>(LastError);
130     return Error::success();
131   }
132 
133   /// Factory method to create an appropriately typed reader for the given
134   /// instrprof file.
135   static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
136 
137   static Expected<std::unique_ptr<InstrProfReader>>
138   create(std::unique_ptr<MemoryBuffer> Buffer);
139 };
140 
141 /// Reader for the simple text based instrprof format.
142 ///
143 /// This format is a simple text format that's suitable for test data. Records
144 /// are separated by one or more blank lines, and record fields are separated by
145 /// new lines.
146 ///
147 /// Each record consists of a function name, a function hash, a number of
148 /// counters, and then each counter value, in that order.
149 class TextInstrProfReader : public InstrProfReader {
150 private:
151   /// The profile data file contents.
152   std::unique_ptr<MemoryBuffer> DataBuffer;
153   /// Iterator over the profile data.
154   line_iterator Line;
155   bool IsIRLevelProfile = false;
156   bool HasCSIRLevelProfile = false;
157   bool InstrEntryBBEnabled = false;
158 
159   Error readValueProfileData(InstrProfRecord &Record);
160 
161 public:
162   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
163       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
164   TextInstrProfReader(const TextInstrProfReader &) = delete;
165   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
166 
167   /// Return true if the given buffer is in text instrprof format.
168   static bool hasFormat(const MemoryBuffer &Buffer);
169 
170   bool isIRLevelProfile() const override { return IsIRLevelProfile; }
171 
172   bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
173 
174   bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; }
175 
176   /// Read the header.
177   Error readHeader() override;
178 
179   /// Read a single record.
180   Error readNextRecord(NamedInstrProfRecord &Record) override;
181 
182   InstrProfSymtab &getSymtab() override {
183     assert(Symtab.get());
184     return *Symtab.get();
185   }
186 };
187 
188 /// Reader for the raw instrprof binary format from runtime.
189 ///
190 /// This format is a raw memory dump of the instrumentation-baed profiling data
191 /// from the runtime.  It has no index.
192 ///
193 /// Templated on the unsigned type whose size matches pointers on the platform
194 /// that wrote the profile.
195 template <class IntPtrT>
196 class RawInstrProfReader : public InstrProfReader {
197 private:
198   /// The profile data file contents.
199   std::unique_ptr<MemoryBuffer> DataBuffer;
200   bool ShouldSwapBytes;
201   // The value of the version field of the raw profile data header. The lower 56
202   // bits specifies the format version and the most significant 8 bits specify
203   // the variant types of the profile.
204   uint64_t Version;
205   uint64_t CountersDelta;
206   uint64_t NamesDelta;
207   const RawInstrProf::ProfileData<IntPtrT> *Data;
208   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
209   const uint64_t *CountersStart;
210   const char *NamesStart;
211   uint64_t NamesSize;
212   // After value profile is all read, this pointer points to
213   // the header of next profile data (if exists)
214   const uint8_t *ValueDataStart;
215   uint32_t ValueKindLast;
216   uint32_t CurValueDataSize;
217 
218 public:
219   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
220       : DataBuffer(std::move(DataBuffer)) {}
221   RawInstrProfReader(const RawInstrProfReader &) = delete;
222   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
223 
224   static bool hasFormat(const MemoryBuffer &DataBuffer);
225   Error readHeader() override;
226   Error readNextRecord(NamedInstrProfRecord &Record) override;
227 
228   bool isIRLevelProfile() const override {
229     return (Version & VARIANT_MASK_IR_PROF) != 0;
230   }
231 
232   bool hasCSIRLevelProfile() const override {
233     return (Version & VARIANT_MASK_CSIR_PROF) != 0;
234   }
235 
236   bool instrEntryBBEnabled() const override {
237     return (Version & VARIANT_MASK_INSTR_ENTRY) != 0;
238   }
239 
240   InstrProfSymtab &getSymtab() override {
241     assert(Symtab.get());
242     return *Symtab.get();
243   }
244 
245 private:
246   Error createSymtab(InstrProfSymtab &Symtab);
247   Error readNextHeader(const char *CurrentPos);
248   Error readHeader(const RawInstrProf::Header &Header);
249 
250   template <class IntT> IntT swap(IntT Int) const {
251     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
252   }
253 
254   support::endianness getDataEndianness() const {
255     support::endianness HostEndian = getHostEndianness();
256     if (!ShouldSwapBytes)
257       return HostEndian;
258     if (HostEndian == support::little)
259       return support::big;
260     else
261       return support::little;
262   }
263 
264   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
265     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
266   }
267 
268   Error readName(NamedInstrProfRecord &Record);
269   Error readFuncHash(NamedInstrProfRecord &Record);
270   Error readRawCounts(InstrProfRecord &Record);
271   Error readValueProfilingData(InstrProfRecord &Record);
272   bool atEnd() const { return Data == DataEnd; }
273 
274   void advanceData() {
275     Data++;
276     ValueDataStart += CurValueDataSize;
277   }
278 
279   const char *getNextHeaderPos() const {
280       assert(atEnd());
281       return (const char *)ValueDataStart;
282   }
283 
284   /// Get the offset of \p CounterPtr from the start of the counters section of
285   /// the profile. The offset has units of "number of counters", i.e. increasing
286   /// the offset by 1 corresponds to an increase in the *byte offset* by 8.
287   ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const {
288     return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
289   }
290 
291   const uint64_t *getCounter(ptrdiff_t Offset) const {
292     return CountersStart + Offset;
293   }
294 
295   StringRef getName(uint64_t NameRef) const {
296     return Symtab->getFuncName(swap(NameRef));
297   }
298 };
299 
300 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
301 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
302 
303 namespace IndexedInstrProf {
304 
305 enum class HashT : uint32_t;
306 
307 } // end namespace IndexedInstrProf
308 
309 /// Trait for lookups into the on-disk hash table for the binary instrprof
310 /// format.
311 class InstrProfLookupTrait {
312   std::vector<NamedInstrProfRecord> DataBuffer;
313   IndexedInstrProf::HashT HashType;
314   unsigned FormatVersion;
315   // Endianness of the input value profile data.
316   // It should be LE by default, but can be changed
317   // for testing purpose.
318   support::endianness ValueProfDataEndianness = support::little;
319 
320 public:
321   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
322       : HashType(HashType), FormatVersion(FormatVersion) {}
323 
324   using data_type = ArrayRef<NamedInstrProfRecord>;
325 
326   using internal_key_type = StringRef;
327   using external_key_type = StringRef;
328   using hash_value_type = uint64_t;
329   using offset_type = uint64_t;
330 
331   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
332   static StringRef GetInternalKey(StringRef K) { return K; }
333   static StringRef GetExternalKey(StringRef K) { return K; }
334 
335   hash_value_type ComputeHash(StringRef K);
336 
337   static std::pair<offset_type, offset_type>
338   ReadKeyDataLength(const unsigned char *&D) {
339     using namespace support;
340 
341     offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
342     offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
343     return std::make_pair(KeyLen, DataLen);
344   }
345 
346   StringRef ReadKey(const unsigned char *D, offset_type N) {
347     return StringRef((const char *)D, N);
348   }
349 
350   bool readValueProfilingData(const unsigned char *&D,
351                               const unsigned char *const End);
352   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
353 
354   // Used for testing purpose only.
355   void setValueProfDataEndianness(support::endianness Endianness) {
356     ValueProfDataEndianness = Endianness;
357   }
358 };
359 
360 struct InstrProfReaderIndexBase {
361   virtual ~InstrProfReaderIndexBase() = default;
362 
363   // Read all the profile records with the same key pointed to the current
364   // iterator.
365   virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
366 
367   // Read all the profile records with the key equal to FuncName
368   virtual Error getRecords(StringRef FuncName,
369                                      ArrayRef<NamedInstrProfRecord> &Data) = 0;
370   virtual void advanceToNextKey() = 0;
371   virtual bool atEnd() const = 0;
372   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
373   virtual uint64_t getVersion() const = 0;
374   virtual bool isIRLevelProfile() const = 0;
375   virtual bool hasCSIRLevelProfile() const = 0;
376   virtual bool instrEntryBBEnabled() const = 0;
377   virtual Error populateSymtab(InstrProfSymtab &) = 0;
378 };
379 
380 using OnDiskHashTableImplV3 =
381     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
382 
383 template <typename HashTableImpl>
384 class InstrProfReaderItaniumRemapper;
385 
386 template <typename HashTableImpl>
387 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
388 private:
389   std::unique_ptr<HashTableImpl> HashTable;
390   typename HashTableImpl::data_iterator RecordIterator;
391   uint64_t FormatVersion;
392 
393   friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
394 
395 public:
396   InstrProfReaderIndex(const unsigned char *Buckets,
397                        const unsigned char *const Payload,
398                        const unsigned char *const Base,
399                        IndexedInstrProf::HashT HashType, uint64_t Version);
400   ~InstrProfReaderIndex() override = default;
401 
402   Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
403   Error getRecords(StringRef FuncName,
404                    ArrayRef<NamedInstrProfRecord> &Data) override;
405   void advanceToNextKey() override { RecordIterator++; }
406 
407   bool atEnd() const override {
408     return RecordIterator == HashTable->data_end();
409   }
410 
411   void setValueProfDataEndianness(support::endianness Endianness) override {
412     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
413   }
414 
415   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
416 
417   bool isIRLevelProfile() const override {
418     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
419   }
420 
421   bool hasCSIRLevelProfile() const override {
422     return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
423   }
424 
425   bool instrEntryBBEnabled() const override {
426     return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
427   }
428 
429   Error populateSymtab(InstrProfSymtab &Symtab) override {
430     return Symtab.create(HashTable->keys());
431   }
432 };
433 
434 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
435 class InstrProfReaderRemapper {
436 public:
437   virtual ~InstrProfReaderRemapper() {}
438   virtual Error populateRemappings() { return Error::success(); }
439   virtual Error getRecords(StringRef FuncName,
440                            ArrayRef<NamedInstrProfRecord> &Data) = 0;
441 };
442 
443 /// Reader for the indexed binary instrprof format.
444 class IndexedInstrProfReader : public InstrProfReader {
445 private:
446   /// The profile data file contents.
447   std::unique_ptr<MemoryBuffer> DataBuffer;
448   /// The profile remapping file contents.
449   std::unique_ptr<MemoryBuffer> RemappingBuffer;
450   /// The index into the profile data.
451   std::unique_ptr<InstrProfReaderIndexBase> Index;
452   /// The profile remapping file contents.
453   std::unique_ptr<InstrProfReaderRemapper> Remapper;
454   /// Profile summary data.
455   std::unique_ptr<ProfileSummary> Summary;
456   /// Context sensitive profile summary data.
457   std::unique_ptr<ProfileSummary> CS_Summary;
458   // Index to the current record in the record array.
459   unsigned RecordIndex;
460 
461   // Read the profile summary. Return a pointer pointing to one byte past the
462   // end of the summary data if it exists or the input \c Cur.
463   // \c UseCS indicates whether to use the context-sensitive profile summary.
464   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
465                                    const unsigned char *Cur, bool UseCS);
466 
467 public:
468   IndexedInstrProfReader(
469       std::unique_ptr<MemoryBuffer> DataBuffer,
470       std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
471       : DataBuffer(std::move(DataBuffer)),
472         RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
473   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
474   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
475 
476   /// Return the profile version.
477   uint64_t getVersion() const { return Index->getVersion(); }
478   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
479   bool hasCSIRLevelProfile() const override {
480     return Index->hasCSIRLevelProfile();
481   }
482 
483   bool instrEntryBBEnabled() const override {
484     return Index->instrEntryBBEnabled();
485   }
486 
487   /// Return true if the given buffer is in an indexed instrprof format.
488   static bool hasFormat(const MemoryBuffer &DataBuffer);
489 
490   /// Read the file header.
491   Error readHeader() override;
492   /// Read a single record.
493   Error readNextRecord(NamedInstrProfRecord &Record) override;
494 
495   /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
496   Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
497                                                uint64_t FuncHash);
498 
499   /// Fill Counts with the profile data for the given function name.
500   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
501                           std::vector<uint64_t> &Counts);
502 
503   /// Return the maximum of all known function counts.
504   /// \c UseCS indicates whether to use the context-sensitive count.
505   uint64_t getMaximumFunctionCount(bool UseCS) {
506     if (UseCS) {
507       assert(CS_Summary && "No context sensitive profile summary");
508       return CS_Summary->getMaxFunctionCount();
509     } else {
510       assert(Summary && "No profile summary");
511       return Summary->getMaxFunctionCount();
512     }
513   }
514 
515   /// Factory method to create an indexed reader.
516   static Expected<std::unique_ptr<IndexedInstrProfReader>>
517   create(const Twine &Path, const Twine &RemappingPath = "");
518 
519   static Expected<std::unique_ptr<IndexedInstrProfReader>>
520   create(std::unique_ptr<MemoryBuffer> Buffer,
521          std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
522 
523   // Used for testing purpose only.
524   void setValueProfDataEndianness(support::endianness Endianness) {
525     Index->setValueProfDataEndianness(Endianness);
526   }
527 
528   // See description in the base class. This interface is designed
529   // to be used by llvm-profdata (for dumping). Avoid using this when
530   // the client is the compiler.
531   InstrProfSymtab &getSymtab() override;
532 
533   /// Return the profile summary.
534   /// \c UseCS indicates whether to use the context-sensitive summary.
535   ProfileSummary &getSummary(bool UseCS) {
536     if (UseCS) {
537       assert(CS_Summary && "No context sensitive summary");
538       return *(CS_Summary.get());
539     } else {
540       assert(Summary && "No profile summary");
541       return *(Summary.get());
542     }
543   }
544 };
545 
546 } // end namespace llvm
547 
548 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
549