1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading profiling data for instrumentation
10 // based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/IR/ProfileSummary.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/LineIterator.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/OnDiskHashTable.h"
26 #include "llvm/Support/SwapByteOrder.h"
27 #include <algorithm>
28 #include <cassert>
29 #include <cstddef>
30 #include <cstdint>
31 #include <iterator>
32 #include <memory>
33 #include <utility>
34 #include <vector>
35 
36 namespace llvm {
37 
38 class InstrProfReader;
39 
40 /// A file format agnostic iterator over profiling data.
41 class InstrProfIterator : public std::iterator<std::input_iterator_tag,
42                                                NamedInstrProfRecord> {
43   InstrProfReader *Reader = nullptr;
44   value_type Record;
45 
46   void Increment();
47 
48 public:
49   InstrProfIterator() = default;
50   InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
51 
52   InstrProfIterator &operator++() { Increment(); return *this; }
53   bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
54   bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
55   value_type &operator*() { return Record; }
56   value_type *operator->() { return &Record; }
57 };
58 
59 /// Base class and interface for reading profiling data of any known instrprof
60 /// format. Provides an iterator over NamedInstrProfRecords.
61 class InstrProfReader {
62   instrprof_error LastError = instrprof_error::success;
63 
64 public:
65   InstrProfReader() = default;
66   virtual ~InstrProfReader() = default;
67 
68   /// Read the header.  Required before reading first record.
69   virtual Error readHeader() = 0;
70 
71   /// Read a single record.
72   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
73 
74   /// Iterator over profile data.
75   InstrProfIterator begin() { return InstrProfIterator(this); }
76   InstrProfIterator end() { return InstrProfIterator(); }
77 
78   virtual bool isIRLevelProfile() const = 0;
79 
80   virtual bool hasCSIRLevelProfile() const = 0;
81 
82   /// Return the PGO symtab. There are three different readers:
83   /// Raw, Text, and Indexed profile readers. The first two types
84   /// of readers are used only by llvm-profdata tool, while the indexed
85   /// profile reader is also used by llvm-cov tool and the compiler (
86   /// backend or frontend). Since creating PGO symtab can create
87   /// significant runtime and memory overhead (as it touches data
88   /// for the whole program), InstrProfSymtab for the indexed profile
89   /// reader should be created on demand and it is recommended to be
90   /// only used for dumping purpose with llvm-proftool, not with the
91   /// compiler.
92   virtual InstrProfSymtab &getSymtab() = 0;
93 
94   /// Compute the sum of counts and return in Sum.
95   void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
96 
97 protected:
98   std::unique_ptr<InstrProfSymtab> Symtab;
99 
100   /// Set the current error and return same.
101   Error error(instrprof_error Err) {
102     LastError = Err;
103     if (Err == instrprof_error::success)
104       return Error::success();
105     return make_error<InstrProfError>(Err);
106   }
107 
108   Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
109 
110   /// Clear the current error and return a successful one.
111   Error success() { return error(instrprof_error::success); }
112 
113 public:
114   /// Return true if the reader has finished reading the profile data.
115   bool isEOF() { return LastError == instrprof_error::eof; }
116 
117   /// Return true if the reader encountered an error reading profiling data.
118   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
119 
120   /// Get the current error.
121   Error getError() {
122     if (hasError())
123       return make_error<InstrProfError>(LastError);
124     return Error::success();
125   }
126 
127   /// Factory method to create an appropriately typed reader for the given
128   /// instrprof file.
129   static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
130 
131   static Expected<std::unique_ptr<InstrProfReader>>
132   create(std::unique_ptr<MemoryBuffer> Buffer);
133 };
134 
135 /// Reader for the simple text based instrprof format.
136 ///
137 /// This format is a simple text format that's suitable for test data. Records
138 /// are separated by one or more blank lines, and record fields are separated by
139 /// new lines.
140 ///
141 /// Each record consists of a function name, a function hash, a number of
142 /// counters, and then each counter value, in that order.
143 class TextInstrProfReader : public InstrProfReader {
144 private:
145   /// The profile data file contents.
146   std::unique_ptr<MemoryBuffer> DataBuffer;
147   /// Iterator over the profile data.
148   line_iterator Line;
149   bool IsIRLevelProfile = false;
150   bool HasCSIRLevelProfile = false;
151 
152   Error readValueProfileData(InstrProfRecord &Record);
153 
154 public:
155   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
156       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
157   TextInstrProfReader(const TextInstrProfReader &) = delete;
158   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
159 
160   /// Return true if the given buffer is in text instrprof format.
161   static bool hasFormat(const MemoryBuffer &Buffer);
162 
163   bool isIRLevelProfile() const override { return IsIRLevelProfile; }
164 
165   bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
166 
167   /// Read the header.
168   Error readHeader() override;
169 
170   /// Read a single record.
171   Error readNextRecord(NamedInstrProfRecord &Record) override;
172 
173   InstrProfSymtab &getSymtab() override {
174     assert(Symtab.get());
175     return *Symtab.get();
176   }
177 };
178 
179 /// Reader for the raw instrprof binary format from runtime.
180 ///
181 /// This format is a raw memory dump of the instrumentation-baed profiling data
182 /// from the runtime.  It has no index.
183 ///
184 /// Templated on the unsigned type whose size matches pointers on the platform
185 /// that wrote the profile.
186 template <class IntPtrT>
187 class RawInstrProfReader : public InstrProfReader {
188 private:
189   /// The profile data file contents.
190   std::unique_ptr<MemoryBuffer> DataBuffer;
191   bool ShouldSwapBytes;
192   // The value of the version field of the raw profile data header. The lower 56
193   // bits specifies the format version and the most significant 8 bits specify
194   // the variant types of the profile.
195   uint64_t Version;
196   uint64_t CountersDelta;
197   uint64_t NamesDelta;
198   const RawInstrProf::ProfileData<IntPtrT> *Data;
199   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
200   const uint64_t *CountersStart;
201   const char *NamesStart;
202   uint64_t NamesSize;
203   // After value profile is all read, this pointer points to
204   // the header of next profile data (if exists)
205   const uint8_t *ValueDataStart;
206   uint32_t ValueKindLast;
207   uint32_t CurValueDataSize;
208 
209 public:
210   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
211       : DataBuffer(std::move(DataBuffer)) {}
212   RawInstrProfReader(const RawInstrProfReader &) = delete;
213   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
214 
215   static bool hasFormat(const MemoryBuffer &DataBuffer);
216   Error readHeader() override;
217   Error readNextRecord(NamedInstrProfRecord &Record) override;
218 
219   bool isIRLevelProfile() const override {
220     return (Version & VARIANT_MASK_IR_PROF) != 0;
221   }
222 
223   bool hasCSIRLevelProfile() const override {
224     return (Version & VARIANT_MASK_CSIR_PROF) != 0;
225   }
226 
227   InstrProfSymtab &getSymtab() override {
228     assert(Symtab.get());
229     return *Symtab.get();
230   }
231 
232 private:
233   Error createSymtab(InstrProfSymtab &Symtab);
234   Error readNextHeader(const char *CurrentPos);
235   Error readHeader(const RawInstrProf::Header &Header);
236 
237   template <class IntT> IntT swap(IntT Int) const {
238     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
239   }
240 
241   support::endianness getDataEndianness() const {
242     support::endianness HostEndian = getHostEndianness();
243     if (!ShouldSwapBytes)
244       return HostEndian;
245     if (HostEndian == support::little)
246       return support::big;
247     else
248       return support::little;
249   }
250 
251   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
252     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
253   }
254 
255   Error readName(NamedInstrProfRecord &Record);
256   Error readFuncHash(NamedInstrProfRecord &Record);
257   Error readRawCounts(InstrProfRecord &Record);
258   Error readValueProfilingData(InstrProfRecord &Record);
259   bool atEnd() const { return Data == DataEnd; }
260 
261   void advanceData() {
262     Data++;
263     ValueDataStart += CurValueDataSize;
264   }
265 
266   const char *getNextHeaderPos() const {
267       assert(atEnd());
268       return (const char *)ValueDataStart;
269   }
270 
271   /// Get the offset of \p CounterPtr from the start of the counters section of
272   /// the profile. The offset has units of "number of counters", i.e. increasing
273   /// the offset by 1 corresponds to an increase in the *byte offset* by 8.
274   ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const {
275     return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
276   }
277 
278   const uint64_t *getCounter(ptrdiff_t Offset) const {
279     return CountersStart + Offset;
280   }
281 
282   StringRef getName(uint64_t NameRef) const {
283     return Symtab->getFuncName(swap(NameRef));
284   }
285 };
286 
287 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
288 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
289 
290 namespace IndexedInstrProf {
291 
292 enum class HashT : uint32_t;
293 
294 } // end namespace IndexedInstrProf
295 
296 /// Trait for lookups into the on-disk hash table for the binary instrprof
297 /// format.
298 class InstrProfLookupTrait {
299   std::vector<NamedInstrProfRecord> DataBuffer;
300   IndexedInstrProf::HashT HashType;
301   unsigned FormatVersion;
302   // Endianness of the input value profile data.
303   // It should be LE by default, but can be changed
304   // for testing purpose.
305   support::endianness ValueProfDataEndianness = support::little;
306 
307 public:
308   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
309       : HashType(HashType), FormatVersion(FormatVersion) {}
310 
311   using data_type = ArrayRef<NamedInstrProfRecord>;
312 
313   using internal_key_type = StringRef;
314   using external_key_type = StringRef;
315   using hash_value_type = uint64_t;
316   using offset_type = uint64_t;
317 
318   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
319   static StringRef GetInternalKey(StringRef K) { return K; }
320   static StringRef GetExternalKey(StringRef K) { return K; }
321 
322   hash_value_type ComputeHash(StringRef K);
323 
324   static std::pair<offset_type, offset_type>
325   ReadKeyDataLength(const unsigned char *&D) {
326     using namespace support;
327 
328     offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
329     offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
330     return std::make_pair(KeyLen, DataLen);
331   }
332 
333   StringRef ReadKey(const unsigned char *D, offset_type N) {
334     return StringRef((const char *)D, N);
335   }
336 
337   bool readValueProfilingData(const unsigned char *&D,
338                               const unsigned char *const End);
339   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
340 
341   // Used for testing purpose only.
342   void setValueProfDataEndianness(support::endianness Endianness) {
343     ValueProfDataEndianness = Endianness;
344   }
345 };
346 
347 struct InstrProfReaderIndexBase {
348   virtual ~InstrProfReaderIndexBase() = default;
349 
350   // Read all the profile records with the same key pointed to the current
351   // iterator.
352   virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
353 
354   // Read all the profile records with the key equal to FuncName
355   virtual Error getRecords(StringRef FuncName,
356                                      ArrayRef<NamedInstrProfRecord> &Data) = 0;
357   virtual void advanceToNextKey() = 0;
358   virtual bool atEnd() const = 0;
359   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
360   virtual uint64_t getVersion() const = 0;
361   virtual bool isIRLevelProfile() const = 0;
362   virtual bool hasCSIRLevelProfile() const = 0;
363   virtual Error populateSymtab(InstrProfSymtab &) = 0;
364 };
365 
366 using OnDiskHashTableImplV3 =
367     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
368 
369 template <typename HashTableImpl>
370 class InstrProfReaderItaniumRemapper;
371 
372 template <typename HashTableImpl>
373 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
374 private:
375   std::unique_ptr<HashTableImpl> HashTable;
376   typename HashTableImpl::data_iterator RecordIterator;
377   uint64_t FormatVersion;
378 
379   friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
380 
381 public:
382   InstrProfReaderIndex(const unsigned char *Buckets,
383                        const unsigned char *const Payload,
384                        const unsigned char *const Base,
385                        IndexedInstrProf::HashT HashType, uint64_t Version);
386   ~InstrProfReaderIndex() override = default;
387 
388   Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
389   Error getRecords(StringRef FuncName,
390                    ArrayRef<NamedInstrProfRecord> &Data) override;
391   void advanceToNextKey() override { RecordIterator++; }
392 
393   bool atEnd() const override {
394     return RecordIterator == HashTable->data_end();
395   }
396 
397   void setValueProfDataEndianness(support::endianness Endianness) override {
398     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
399   }
400 
401   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
402 
403   bool isIRLevelProfile() const override {
404     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
405   }
406 
407   bool hasCSIRLevelProfile() const override {
408     return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
409   }
410 
411   Error populateSymtab(InstrProfSymtab &Symtab) override {
412     return Symtab.create(HashTable->keys());
413   }
414 };
415 
416 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
417 class InstrProfReaderRemapper {
418 public:
419   virtual ~InstrProfReaderRemapper() {}
420   virtual Error populateRemappings() { return Error::success(); }
421   virtual Error getRecords(StringRef FuncName,
422                            ArrayRef<NamedInstrProfRecord> &Data) = 0;
423 };
424 
425 /// Reader for the indexed binary instrprof format.
426 class IndexedInstrProfReader : public InstrProfReader {
427 private:
428   /// The profile data file contents.
429   std::unique_ptr<MemoryBuffer> DataBuffer;
430   /// The profile remapping file contents.
431   std::unique_ptr<MemoryBuffer> RemappingBuffer;
432   /// The index into the profile data.
433   std::unique_ptr<InstrProfReaderIndexBase> Index;
434   /// The profile remapping file contents.
435   std::unique_ptr<InstrProfReaderRemapper> Remapper;
436   /// Profile summary data.
437   std::unique_ptr<ProfileSummary> Summary;
438   /// Context sensitive profile summary data.
439   std::unique_ptr<ProfileSummary> CS_Summary;
440   // Index to the current record in the record array.
441   unsigned RecordIndex;
442 
443   // Read the profile summary. Return a pointer pointing to one byte past the
444   // end of the summary data if it exists or the input \c Cur.
445   // \c UseCS indicates whether to use the context-sensitive profile summary.
446   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
447                                    const unsigned char *Cur, bool UseCS);
448 
449 public:
450   IndexedInstrProfReader(
451       std::unique_ptr<MemoryBuffer> DataBuffer,
452       std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
453       : DataBuffer(std::move(DataBuffer)),
454         RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
455   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
456   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
457 
458   /// Return the profile version.
459   uint64_t getVersion() const { return Index->getVersion(); }
460   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
461   bool hasCSIRLevelProfile() const override {
462     return Index->hasCSIRLevelProfile();
463   }
464 
465   /// Return true if the given buffer is in an indexed instrprof format.
466   static bool hasFormat(const MemoryBuffer &DataBuffer);
467 
468   /// Read the file header.
469   Error readHeader() override;
470   /// Read a single record.
471   Error readNextRecord(NamedInstrProfRecord &Record) override;
472 
473   /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
474   Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
475                                                uint64_t FuncHash);
476 
477   /// Fill Counts with the profile data for the given function name.
478   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
479                           std::vector<uint64_t> &Counts);
480 
481   /// Return the maximum of all known function counts.
482   /// \c UseCS indicates whether to use the context-sensitive count.
483   uint64_t getMaximumFunctionCount(bool UseCS) {
484     if (UseCS) {
485       assert(CS_Summary && "No context sensitive profile summary");
486       return CS_Summary->getMaxFunctionCount();
487     } else {
488       assert(Summary && "No profile summary");
489       return Summary->getMaxFunctionCount();
490     }
491   }
492 
493   /// Factory method to create an indexed reader.
494   static Expected<std::unique_ptr<IndexedInstrProfReader>>
495   create(const Twine &Path, const Twine &RemappingPath = "");
496 
497   static Expected<std::unique_ptr<IndexedInstrProfReader>>
498   create(std::unique_ptr<MemoryBuffer> Buffer,
499          std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
500 
501   // Used for testing purpose only.
502   void setValueProfDataEndianness(support::endianness Endianness) {
503     Index->setValueProfDataEndianness(Endianness);
504   }
505 
506   // See description in the base class. This interface is designed
507   // to be used by llvm-profdata (for dumping). Avoid using this when
508   // the client is the compiler.
509   InstrProfSymtab &getSymtab() override;
510 
511   /// Return the profile summary.
512   /// \c UseCS indicates whether to use the context-sensitive summary.
513   ProfileSummary &getSummary(bool UseCS) {
514     if (UseCS) {
515       assert(CS_Summary && "No context sensitive summary");
516       return *(CS_Summary.get());
517     } else {
518       assert(Summary && "No profile summary");
519       return *(Summary.get());
520     }
521   }
522 };
523 
524 } // end namespace llvm
525 
526 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
527