1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading profiling data for instrumentation
10 // based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/IR/ProfileSummary.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/LineIterator.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/OnDiskHashTable.h"
26 #include "llvm/Support/SwapByteOrder.h"
27 #include <algorithm>
28 #include <cassert>
29 #include <cstddef>
30 #include <cstdint>
31 #include <iterator>
32 #include <memory>
33 #include <utility>
34 #include <vector>
35 
36 namespace llvm {
37 
38 class InstrProfReader;
39 
40 /// A file format agnostic iterator over profiling data.
41 class InstrProfIterator {
42 public:
43   using iterator_category = std::input_iterator_tag;
44   using value_type = NamedInstrProfRecord;
45   using difference_type = std::ptrdiff_t;
46   using pointer = value_type *;
47   using reference = value_type &;
48 
49 private:
50   InstrProfReader *Reader = nullptr;
51   value_type Record;
52 
53   void Increment();
54 
55 public:
56   InstrProfIterator() = default;
57   InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
58 
59   InstrProfIterator &operator++() { Increment(); return *this; }
60   bool operator==(const InstrProfIterator &RHS) const {
61     return Reader == RHS.Reader;
62   }
63   bool operator!=(const InstrProfIterator &RHS) const {
64     return Reader != RHS.Reader;
65   }
66   value_type &operator*() { return Record; }
67   value_type *operator->() { return &Record; }
68 };
69 
70 /// Base class and interface for reading profiling data of any known instrprof
71 /// format. Provides an iterator over NamedInstrProfRecords.
72 class InstrProfReader {
73   instrprof_error LastError = instrprof_error::success;
74 
75 public:
76   InstrProfReader() = default;
77   virtual ~InstrProfReader() = default;
78 
79   /// Read the header.  Required before reading first record.
80   virtual Error readHeader() = 0;
81 
82   /// Read a single record.
83   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
84 
85   /// Print binary ids on stream OS.
86   virtual Error printBinaryIds(raw_ostream &OS) { return success(); };
87 
88   /// Iterator over profile data.
89   InstrProfIterator begin() { return InstrProfIterator(this); }
90   InstrProfIterator end() { return InstrProfIterator(); }
91 
92   virtual bool isIRLevelProfile() const = 0;
93 
94   virtual bool hasCSIRLevelProfile() const = 0;
95 
96   virtual bool instrEntryBBEnabled() const = 0;
97 
98   /// Return the PGO symtab. There are three different readers:
99   /// Raw, Text, and Indexed profile readers. The first two types
100   /// of readers are used only by llvm-profdata tool, while the indexed
101   /// profile reader is also used by llvm-cov tool and the compiler (
102   /// backend or frontend). Since creating PGO symtab can create
103   /// significant runtime and memory overhead (as it touches data
104   /// for the whole program), InstrProfSymtab for the indexed profile
105   /// reader should be created on demand and it is recommended to be
106   /// only used for dumping purpose with llvm-proftool, not with the
107   /// compiler.
108   virtual InstrProfSymtab &getSymtab() = 0;
109 
110   /// Compute the sum of counts and return in Sum.
111   void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
112 
113 protected:
114   std::unique_ptr<InstrProfSymtab> Symtab;
115 
116   /// Set the current error and return same.
117   Error error(instrprof_error Err) {
118     LastError = Err;
119     if (Err == instrprof_error::success)
120       return Error::success();
121     return make_error<InstrProfError>(Err);
122   }
123 
124   Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
125 
126   /// Clear the current error and return a successful one.
127   Error success() { return error(instrprof_error::success); }
128 
129 public:
130   /// Return true if the reader has finished reading the profile data.
131   bool isEOF() { return LastError == instrprof_error::eof; }
132 
133   /// Return true if the reader encountered an error reading profiling data.
134   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
135 
136   /// Get the current error.
137   Error getError() {
138     if (hasError())
139       return make_error<InstrProfError>(LastError);
140     return Error::success();
141   }
142 
143   /// Factory method to create an appropriately typed reader for the given
144   /// instrprof file.
145   static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
146 
147   static Expected<std::unique_ptr<InstrProfReader>>
148   create(std::unique_ptr<MemoryBuffer> Buffer);
149 };
150 
151 /// Reader for the simple text based instrprof format.
152 ///
153 /// This format is a simple text format that's suitable for test data. Records
154 /// are separated by one or more blank lines, and record fields are separated by
155 /// new lines.
156 ///
157 /// Each record consists of a function name, a function hash, a number of
158 /// counters, and then each counter value, in that order.
159 class TextInstrProfReader : public InstrProfReader {
160 private:
161   /// The profile data file contents.
162   std::unique_ptr<MemoryBuffer> DataBuffer;
163   /// Iterator over the profile data.
164   line_iterator Line;
165   bool IsIRLevelProfile = false;
166   bool HasCSIRLevelProfile = false;
167   bool InstrEntryBBEnabled = false;
168 
169   Error readValueProfileData(InstrProfRecord &Record);
170 
171 public:
172   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
173       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
174   TextInstrProfReader(const TextInstrProfReader &) = delete;
175   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
176 
177   /// Return true if the given buffer is in text instrprof format.
178   static bool hasFormat(const MemoryBuffer &Buffer);
179 
180   bool isIRLevelProfile() const override { return IsIRLevelProfile; }
181 
182   bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
183 
184   bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; }
185 
186   /// Read the header.
187   Error readHeader() override;
188 
189   /// Read a single record.
190   Error readNextRecord(NamedInstrProfRecord &Record) override;
191 
192   InstrProfSymtab &getSymtab() override {
193     assert(Symtab.get());
194     return *Symtab.get();
195   }
196 };
197 
198 /// Reader for the raw instrprof binary format from runtime.
199 ///
200 /// This format is a raw memory dump of the instrumentation-baed profiling data
201 /// from the runtime.  It has no index.
202 ///
203 /// Templated on the unsigned type whose size matches pointers on the platform
204 /// that wrote the profile.
205 template <class IntPtrT>
206 class RawInstrProfReader : public InstrProfReader {
207 private:
208   /// The profile data file contents.
209   std::unique_ptr<MemoryBuffer> DataBuffer;
210   bool ShouldSwapBytes;
211   // The value of the version field of the raw profile data header. The lower 56
212   // bits specifies the format version and the most significant 8 bits specify
213   // the variant types of the profile.
214   uint64_t Version;
215   uint64_t CountersDelta;
216   uint64_t NamesDelta;
217   const RawInstrProf::ProfileData<IntPtrT> *Data;
218   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
219   const uint64_t *CountersStart;
220   const char *NamesStart;
221   uint64_t NamesSize;
222   // After value profile is all read, this pointer points to
223   // the header of next profile data (if exists)
224   const uint8_t *ValueDataStart;
225   uint32_t ValueKindLast;
226   uint32_t CurValueDataSize;
227 
228   uint64_t BinaryIdsSize;
229   const uint8_t *BinaryIdsStart;
230 
231 public:
232   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
233       : DataBuffer(std::move(DataBuffer)) {}
234   RawInstrProfReader(const RawInstrProfReader &) = delete;
235   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
236 
237   static bool hasFormat(const MemoryBuffer &DataBuffer);
238   Error readHeader() override;
239   Error readNextRecord(NamedInstrProfRecord &Record) override;
240   Error printBinaryIds(raw_ostream &OS) override;
241 
242   bool isIRLevelProfile() const override {
243     return (Version & VARIANT_MASK_IR_PROF) != 0;
244   }
245 
246   bool hasCSIRLevelProfile() const override {
247     return (Version & VARIANT_MASK_CSIR_PROF) != 0;
248   }
249 
250   bool instrEntryBBEnabled() const override {
251     return (Version & VARIANT_MASK_INSTR_ENTRY) != 0;
252   }
253 
254   InstrProfSymtab &getSymtab() override {
255     assert(Symtab.get());
256     return *Symtab.get();
257   }
258 
259 private:
260   Error createSymtab(InstrProfSymtab &Symtab);
261   Error readNextHeader(const char *CurrentPos);
262   Error readHeader(const RawInstrProf::Header &Header);
263 
264   template <class IntT> IntT swap(IntT Int) const {
265     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
266   }
267 
268   support::endianness getDataEndianness() const {
269     support::endianness HostEndian = getHostEndianness();
270     if (!ShouldSwapBytes)
271       return HostEndian;
272     if (HostEndian == support::little)
273       return support::big;
274     else
275       return support::little;
276   }
277 
278   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
279     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
280   }
281 
282   Error readName(NamedInstrProfRecord &Record);
283   Error readFuncHash(NamedInstrProfRecord &Record);
284   Error readRawCounts(InstrProfRecord &Record);
285   Error readValueProfilingData(InstrProfRecord &Record);
286   bool atEnd() const { return Data == DataEnd; }
287 
288   void advanceData() {
289     Data++;
290     ValueDataStart += CurValueDataSize;
291   }
292 
293   const char *getNextHeaderPos() const {
294       assert(atEnd());
295       return (const char *)ValueDataStart;
296   }
297 
298   /// Get the offset of \p CounterPtr from the start of the counters section of
299   /// the profile. The offset has units of "number of counters", i.e. increasing
300   /// the offset by 1 corresponds to an increase in the *byte offset* by 8.
301   ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const {
302     return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
303   }
304 
305   const uint64_t *getCounter(ptrdiff_t Offset) const {
306     return CountersStart + Offset;
307   }
308 
309   StringRef getName(uint64_t NameRef) const {
310     return Symtab->getFuncName(swap(NameRef));
311   }
312 };
313 
314 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
315 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
316 
317 namespace IndexedInstrProf {
318 
319 enum class HashT : uint32_t;
320 
321 } // end namespace IndexedInstrProf
322 
323 /// Trait for lookups into the on-disk hash table for the binary instrprof
324 /// format.
325 class InstrProfLookupTrait {
326   std::vector<NamedInstrProfRecord> DataBuffer;
327   IndexedInstrProf::HashT HashType;
328   unsigned FormatVersion;
329   // Endianness of the input value profile data.
330   // It should be LE by default, but can be changed
331   // for testing purpose.
332   support::endianness ValueProfDataEndianness = support::little;
333 
334 public:
335   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
336       : HashType(HashType), FormatVersion(FormatVersion) {}
337 
338   using data_type = ArrayRef<NamedInstrProfRecord>;
339 
340   using internal_key_type = StringRef;
341   using external_key_type = StringRef;
342   using hash_value_type = uint64_t;
343   using offset_type = uint64_t;
344 
345   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
346   static StringRef GetInternalKey(StringRef K) { return K; }
347   static StringRef GetExternalKey(StringRef K) { return K; }
348 
349   hash_value_type ComputeHash(StringRef K);
350 
351   static std::pair<offset_type, offset_type>
352   ReadKeyDataLength(const unsigned char *&D) {
353     using namespace support;
354 
355     offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
356     offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
357     return std::make_pair(KeyLen, DataLen);
358   }
359 
360   StringRef ReadKey(const unsigned char *D, offset_type N) {
361     return StringRef((const char *)D, N);
362   }
363 
364   bool readValueProfilingData(const unsigned char *&D,
365                               const unsigned char *const End);
366   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
367 
368   // Used for testing purpose only.
369   void setValueProfDataEndianness(support::endianness Endianness) {
370     ValueProfDataEndianness = Endianness;
371   }
372 };
373 
374 struct InstrProfReaderIndexBase {
375   virtual ~InstrProfReaderIndexBase() = default;
376 
377   // Read all the profile records with the same key pointed to the current
378   // iterator.
379   virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
380 
381   // Read all the profile records with the key equal to FuncName
382   virtual Error getRecords(StringRef FuncName,
383                                      ArrayRef<NamedInstrProfRecord> &Data) = 0;
384   virtual void advanceToNextKey() = 0;
385   virtual bool atEnd() const = 0;
386   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
387   virtual uint64_t getVersion() const = 0;
388   virtual bool isIRLevelProfile() const = 0;
389   virtual bool hasCSIRLevelProfile() const = 0;
390   virtual bool instrEntryBBEnabled() const = 0;
391   virtual Error populateSymtab(InstrProfSymtab &) = 0;
392 };
393 
394 using OnDiskHashTableImplV3 =
395     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
396 
397 template <typename HashTableImpl>
398 class InstrProfReaderItaniumRemapper;
399 
400 template <typename HashTableImpl>
401 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
402 private:
403   std::unique_ptr<HashTableImpl> HashTable;
404   typename HashTableImpl::data_iterator RecordIterator;
405   uint64_t FormatVersion;
406 
407   friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
408 
409 public:
410   InstrProfReaderIndex(const unsigned char *Buckets,
411                        const unsigned char *const Payload,
412                        const unsigned char *const Base,
413                        IndexedInstrProf::HashT HashType, uint64_t Version);
414   ~InstrProfReaderIndex() override = default;
415 
416   Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
417   Error getRecords(StringRef FuncName,
418                    ArrayRef<NamedInstrProfRecord> &Data) override;
419   void advanceToNextKey() override { RecordIterator++; }
420 
421   bool atEnd() const override {
422     return RecordIterator == HashTable->data_end();
423   }
424 
425   void setValueProfDataEndianness(support::endianness Endianness) override {
426     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
427   }
428 
429   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
430 
431   bool isIRLevelProfile() const override {
432     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
433   }
434 
435   bool hasCSIRLevelProfile() const override {
436     return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
437   }
438 
439   bool instrEntryBBEnabled() const override {
440     return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
441   }
442 
443   Error populateSymtab(InstrProfSymtab &Symtab) override {
444     return Symtab.create(HashTable->keys());
445   }
446 };
447 
448 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
449 class InstrProfReaderRemapper {
450 public:
451   virtual ~InstrProfReaderRemapper() {}
452   virtual Error populateRemappings() { return Error::success(); }
453   virtual Error getRecords(StringRef FuncName,
454                            ArrayRef<NamedInstrProfRecord> &Data) = 0;
455 };
456 
457 /// Reader for the indexed binary instrprof format.
458 class IndexedInstrProfReader : public InstrProfReader {
459 private:
460   /// The profile data file contents.
461   std::unique_ptr<MemoryBuffer> DataBuffer;
462   /// The profile remapping file contents.
463   std::unique_ptr<MemoryBuffer> RemappingBuffer;
464   /// The index into the profile data.
465   std::unique_ptr<InstrProfReaderIndexBase> Index;
466   /// The profile remapping file contents.
467   std::unique_ptr<InstrProfReaderRemapper> Remapper;
468   /// Profile summary data.
469   std::unique_ptr<ProfileSummary> Summary;
470   /// Context sensitive profile summary data.
471   std::unique_ptr<ProfileSummary> CS_Summary;
472   // Index to the current record in the record array.
473   unsigned RecordIndex;
474 
475   // Read the profile summary. Return a pointer pointing to one byte past the
476   // end of the summary data if it exists or the input \c Cur.
477   // \c UseCS indicates whether to use the context-sensitive profile summary.
478   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
479                                    const unsigned char *Cur, bool UseCS);
480 
481 public:
482   IndexedInstrProfReader(
483       std::unique_ptr<MemoryBuffer> DataBuffer,
484       std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
485       : DataBuffer(std::move(DataBuffer)),
486         RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
487   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
488   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
489 
490   /// Return the profile version.
491   uint64_t getVersion() const { return Index->getVersion(); }
492   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
493   bool hasCSIRLevelProfile() const override {
494     return Index->hasCSIRLevelProfile();
495   }
496 
497   bool instrEntryBBEnabled() const override {
498     return Index->instrEntryBBEnabled();
499   }
500 
501   /// Return true if the given buffer is in an indexed instrprof format.
502   static bool hasFormat(const MemoryBuffer &DataBuffer);
503 
504   /// Read the file header.
505   Error readHeader() override;
506   /// Read a single record.
507   Error readNextRecord(NamedInstrProfRecord &Record) override;
508 
509   /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
510   Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
511                                                uint64_t FuncHash);
512 
513   /// Fill Counts with the profile data for the given function name.
514   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
515                           std::vector<uint64_t> &Counts);
516 
517   /// Return the maximum of all known function counts.
518   /// \c UseCS indicates whether to use the context-sensitive count.
519   uint64_t getMaximumFunctionCount(bool UseCS) {
520     if (UseCS) {
521       assert(CS_Summary && "No context sensitive profile summary");
522       return CS_Summary->getMaxFunctionCount();
523     } else {
524       assert(Summary && "No profile summary");
525       return Summary->getMaxFunctionCount();
526     }
527   }
528 
529   /// Factory method to create an indexed reader.
530   static Expected<std::unique_ptr<IndexedInstrProfReader>>
531   create(const Twine &Path, const Twine &RemappingPath = "");
532 
533   static Expected<std::unique_ptr<IndexedInstrProfReader>>
534   create(std::unique_ptr<MemoryBuffer> Buffer,
535          std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
536 
537   // Used for testing purpose only.
538   void setValueProfDataEndianness(support::endianness Endianness) {
539     Index->setValueProfDataEndianness(Endianness);
540   }
541 
542   // See description in the base class. This interface is designed
543   // to be used by llvm-profdata (for dumping). Avoid using this when
544   // the client is the compiler.
545   InstrProfSymtab &getSymtab() override;
546 
547   /// Return the profile summary.
548   /// \c UseCS indicates whether to use the context-sensitive summary.
549   ProfileSummary &getSummary(bool UseCS) {
550     if (UseCS) {
551       assert(CS_Summary && "No context sensitive summary");
552       return *(CS_Summary.get());
553     } else {
554       assert(Summary && "No profile summary");
555       return *(Summary.get());
556     }
557   }
558 };
559 
560 } // end namespace llvm
561 
562 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
563