1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains support for reading profiling data for instrumentation
11 // based PGO and coverage.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
16 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/IR/ProfileSummary.h"
21 #include "llvm/ProfileData/InstrProf.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/LineIterator.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/OnDiskHashTable.h"
27 #include "llvm/Support/SwapByteOrder.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <cstdint>
32 #include <iterator>
33 #include <memory>
34 #include <utility>
35 #include <vector>
36 
37 namespace llvm {
38 
39 class InstrProfReader;
40 
41 /// A file format agnostic iterator over profiling data.
42 class InstrProfIterator : public std::iterator<std::input_iterator_tag,
43                                                NamedInstrProfRecord> {
44   InstrProfReader *Reader = nullptr;
45   value_type Record;
46 
47   void Increment();
48 
49 public:
50   InstrProfIterator() = default;
51   InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
52 
53   InstrProfIterator &operator++() { Increment(); return *this; }
54   bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
55   bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
56   value_type &operator*() { return Record; }
57   value_type *operator->() { return &Record; }
58 };
59 
60 /// Base class and interface for reading profiling data of any known instrprof
61 /// format. Provides an iterator over NamedInstrProfRecords.
62 class InstrProfReader {
63   instrprof_error LastError = instrprof_error::success;
64 
65 public:
66   InstrProfReader() = default;
67   virtual ~InstrProfReader() = default;
68 
69   /// Read the header.  Required before reading first record.
70   virtual Error readHeader() = 0;
71 
72   /// Read a single record.
73   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
74 
75   /// Iterator over profile data.
76   InstrProfIterator begin() { return InstrProfIterator(this); }
77   InstrProfIterator end() { return InstrProfIterator(); }
78 
79   virtual bool isIRLevelProfile() const = 0;
80 
81   /// Return the PGO symtab. There are three different readers:
82   /// Raw, Text, and Indexed profile readers. The first two types
83   /// of readers are used only by llvm-profdata tool, while the indexed
84   /// profile reader is also used by llvm-cov tool and the compiler (
85   /// backend or frontend). Since creating PGO symtab can create
86   /// significant runtime and memory overhead (as it touches data
87   /// for the whole program), InstrProfSymtab for the indexed profile
88   /// reader should be created on demand and it is recommended to be
89   /// only used for dumping purpose with llvm-proftool, not with the
90   /// compiler.
getStreamer()91   virtual InstrProfSymtab &getSymtab() = 0;
92 
93 protected:
94   std::unique_ptr<InstrProfSymtab> Symtab;
95 
96   /// Set the current error and return same.
97   Error error(instrprof_error Err) {
98     LastError = Err;
99     if (Err == instrprof_error::success)
100       return Error::success();
101     return make_error<InstrProfError>(Err);
102   }
103 
104   Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
105 
106   /// Clear the current error and return a successful one.
107   Error success() { return error(instrprof_error::success); }
108 
109 public:
110   /// Return true if the reader has finished reading the profile data.
111   bool isEOF() { return LastError == instrprof_error::eof; }
112 
113   /// Return true if the reader encountered an error reading profiling data.
114   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
115 
116   /// Get the current error.
117   Error getError() {
118     if (hasError())
119       return make_error<InstrProfError>(LastError);
120     return Error::success();
121   }
122 
123   /// Factory method to create an appropriately typed reader for the given
124   /// instrprof file.
125   static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
126 
127   static Expected<std::unique_ptr<InstrProfReader>>
128   create(std::unique_ptr<MemoryBuffer> Buffer);
129 };
130 
131 /// Reader for the simple text based instrprof format.
132 ///
133 /// This format is a simple text format that's suitable for test data. Records
134 /// are separated by one or more blank lines, and record fields are separated by
135 /// new lines.
136 ///
137 /// Each record consists of a function name, a function hash, a number of
138 /// counters, and then each counter value, in that order.
139 class TextInstrProfReader : public InstrProfReader {
140 private:
141   /// The profile data file contents.
142   std::unique_ptr<MemoryBuffer> DataBuffer;
143   /// Iterator over the profile data.
144   line_iterator Line;
145   bool IsIRLevelProfile = false;
146 
147   Error readValueProfileData(InstrProfRecord &Record);
148 
149 public:
150   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
151       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
152   TextInstrProfReader(const TextInstrProfReader &) = delete;
153   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
154 
155   /// Return true if the given buffer is in text instrprof format.
156   static bool hasFormat(const MemoryBuffer &Buffer);
157 
158   bool isIRLevelProfile() const override { return IsIRLevelProfile; }
159 
160   /// Read the header.
161   Error readHeader() override;
162 
163   /// Read a single record.
164   Error readNextRecord(NamedInstrProfRecord &Record) override;
165 
166   InstrProfSymtab &getSymtab() override {
167     assert(Symtab.get());
168     return *Symtab.get();
169   }
170 };
171 
172 /// Reader for the raw instrprof binary format from runtime.
173 ///
174 /// This format is a raw memory dump of the instrumentation-baed profiling data
175 /// from the runtime.  It has no index.
176 ///
177 /// Templated on the unsigned type whose size matches pointers on the platform
178 /// that wrote the profile.
179 template <class IntPtrT>
180 class RawInstrProfReader : public InstrProfReader {
181 private:
182   /// The profile data file contents.
183   std::unique_ptr<MemoryBuffer> DataBuffer;
184   bool ShouldSwapBytes;
185   // The value of the version field of the raw profile data header. The lower 56
186   // bits specifies the format version and the most significant 8 bits specify
187   // the variant types of the profile.
188   uint64_t Version;
189   uint64_t CountersDelta;
190   uint64_t NamesDelta;
191   const RawInstrProf::ProfileData<IntPtrT> *Data;
192   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
193   const uint64_t *CountersStart;
194   const char *NamesStart;
195   uint64_t NamesSize;
196   // After value profile is all read, this pointer points to
197   // the header of next profile data (if exists)
198   const uint8_t *ValueDataStart;
199   uint32_t ValueKindLast;
200   uint32_t CurValueDataSize;
201 
202 public:
203   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
204       : DataBuffer(std::move(DataBuffer)) {}
205   RawInstrProfReader(const RawInstrProfReader &) = delete;
206   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
207 
208   static bool hasFormat(const MemoryBuffer &DataBuffer);
209   Error readHeader() override;
210   Error readNextRecord(NamedInstrProfRecord &Record) override;
211 
212   bool isIRLevelProfile() const override {
213     return (Version & VARIANT_MASK_IR_PROF) != 0;
214   }
215 
216   InstrProfSymtab &getSymtab() override {
217     assert(Symtab.get());
218     return *Symtab.get();
219   }
220 
221 private:
222   Error createSymtab(InstrProfSymtab &Symtab);
223   Error readNextHeader(const char *CurrentPos);
224   Error readHeader(const RawInstrProf::Header &Header);
225 
getCurrentWinFrameInfo()226   template <class IntT> IntT swap(IntT Int) const {
227     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
228   }
229 
230   support::endianness getDataEndianness() const {
231     support::endianness HostEndian = getHostEndianness();
232     if (!ShouldSwapBytes)
233       return HostEndian;
234     if (HostEndian == support::little)
235       return support::big;
236     else
237       return support::little;
238   }
239 
240   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
241     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
setTargetStreamer(MCTargetStreamer * TS)242   }
243 
244   Error readName(NamedInstrProfRecord &Record);
245   Error readFuncHash(NamedInstrProfRecord &Record);
246   Error readRawCounts(InstrProfRecord &Record);
247   Error readValueProfilingData(InstrProfRecord &Record);
248   bool atEnd() const { return Data == DataEnd; }
249 
getContext()250   void advanceData() {
251     Data++;
252     ValueDataStart += CurValueDataSize;
253   }
setUseAssemblerInfoForParsing(bool v)254 
255   const char *getNextHeaderPos() const {
256       assert(atEnd());
257       return (const char *)ValueDataStart;
258   }
259 
260   const uint64_t *getCounter(IntPtrT CounterPtr) const {
getNumFrameInfos()261     ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
getDwarfFrameInfos()262     return CountersStart + Offset;
263   }
264 
265   StringRef getName(uint64_t NameRef) const {
266     return Symtab->getFuncName(swap(NameRef));
267   }
getNumWinFrameInfos()268 };
getWinFrameInfos()269 
270 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
271 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
272 
273 namespace IndexedInstrProf {
274 
275 enum class HashT : uint32_t;
276 
277 } // end namespace IndexedInstrProf
278 
279 /// Trait for lookups into the on-disk hash table for the binary instrprof
isVerboseAsm()280 /// format.
281 class InstrProfLookupTrait {
282   std::vector<NamedInstrProfRecord> DataBuffer;
283   IndexedInstrProf::HashT HashType;
284   unsigned FormatVersion;
285   // Endianness of the input value profile data.
286   // It should be LE by default, but can be changed
287   // for testing purpose.
288   support::endianness ValueProfDataEndianness = support::little;
289 
290 public:
291   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
292       : HashType(HashType), FormatVersion(FormatVersion) {}
293 
294   using data_type = ArrayRef<NamedInstrProfRecord>;
295 
296   using internal_key_type = StringRef;
297   using external_key_type = StringRef;
298   using hash_value_type = uint64_t;
299   using offset_type = uint64_t;
300 
301   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
302   static StringRef GetInternalKey(StringRef K) { return K; }
303   static StringRef GetExternalKey(StringRef K) { return K; }
304 
305   hash_value_type ComputeHash(StringRef K);
306 
307   static std::pair<offset_type, offset_type>
308   ReadKeyDataLength(const unsigned char *&D) {
309     using namespace support;
310 
311     offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
312     offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
313     return std::make_pair(KeyLen, DataLen);
314   }
315 
316   StringRef ReadKey(const unsigned char *D, offset_type N) {
317     return StringRef((const char *)D, N);
318   }
319 
320   bool readValueProfilingData(const unsigned char *&D,
321                               const unsigned char *const End);
322   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
323 
324   // Used for testing purpose only.
325   void setValueProfDataEndianness(support::endianness Endianness) {
326     ValueProfDataEndianness = Endianness;
327   }
328 };
329 
330 struct InstrProfReaderIndexBase {
331   virtual ~InstrProfReaderIndexBase() = default;
getCurrentSection()332 
333   // Read all the profile records with the same key pointed to the current
334   // iterator.
335   virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
336 
getCurrentSectionOnly()337   // Read all the profile records with the key equal to FuncName
338   virtual Error getRecords(StringRef FuncName,
339                                      ArrayRef<NamedInstrProfRecord> &Data) = 0;
getPreviousSection()340   virtual void advanceToNextKey() = 0;
341   virtual bool atEnd() const = 0;
342   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
343   virtual uint64_t getVersion() const = 0;
344   virtual bool isIRLevelProfile() const = 0;
345   virtual Error populateSymtab(InstrProfSymtab &) = 0;
346 };
347 
GetSymbolOrder(const MCSymbol * Sym)348 using OnDiskHashTableImplV3 =
349     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
350 
351 template <typename HashTableImpl>
352 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
353 private:
354   std::unique_ptr<HashTableImpl> HashTable;
355   typename HashTableImpl::data_iterator RecordIterator;
356   uint64_t FormatVersion;
357 
358 public:
PushSection()359   InstrProfReaderIndex(const unsigned char *Buckets,
360                        const unsigned char *const Payload,
361                        const unsigned char *const Base,
362                        IndexedInstrProf::HashT HashType, uint64_t Version);
363   ~InstrProfReaderIndex() override = default;
364 
365   Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
366   Error getRecords(StringRef FuncName,
367                    ArrayRef<NamedInstrProfRecord> &Data) override;
PopSection()368   void advanceToNextKey() override { RecordIterator++; }
369 
370   bool atEnd() const override {
371     return RecordIterator == HashTable->data_end();
372   }
373 
374   void setValueProfDataEndianness(support::endianness Endianness) override {
375     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
376   }
377 
378   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
379 
380   bool isIRLevelProfile() const override {
381     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
382   }
SubSection(const MCExpr * Subsection)383 
384   Error populateSymtab(InstrProfSymtab &Symtab) override {
385     return Symtab.create(HashTable->keys());
386   }
387 };
388 
389 /// Reader for the indexed binary instrprof format.
390 class IndexedInstrProfReader : public InstrProfReader {
391 private:
392   /// The profile data file contents.
393   std::unique_ptr<MemoryBuffer> DataBuffer;
394   /// The index into the profile data.
395   std::unique_ptr<InstrProfReaderIndexBase> Index;
396   /// Profile summary data.
397   std::unique_ptr<ProfileSummary> Summary;
398   // Index to the current record in the record array.
399   unsigned RecordIndex;
400 
401   // Read the profile summary. Return a pointer pointing to one byte past the
402   // end of the summary data if it exists or the input \c Cur.
403   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
404                                    const unsigned char *Cur);
405 
406 public:
407   IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
408       : DataBuffer(std::move(DataBuffer)), RecordIndex(0) {}
409   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
410   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
411 
412   /// Return the profile version.
413   uint64_t getVersion() const { return Index->getVersion(); }
414   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
415 
416   /// Return true if the given buffer is in an indexed instrprof format.
417   static bool hasFormat(const MemoryBuffer &DataBuffer);
418 
419   /// Read the file header.
420   Error readHeader() override;
421   /// Read a single record.
422   Error readNextRecord(NamedInstrProfRecord &Record) override;
423 
424   /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
425   Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
426                                                uint64_t FuncHash);
427 
428   /// Fill Counts with the profile data for the given function name.
429   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
430                           std::vector<uint64_t> &Counts);
431 
432   /// Return the maximum of all known function counts.
433   uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); }
434 
435   /// Factory method to create an indexed reader.
436   static Expected<std::unique_ptr<IndexedInstrProfReader>>
437   create(const Twine &Path);
438 
439   static Expected<std::unique_ptr<IndexedInstrProfReader>>
EmitLinkerOptions(ArrayRef<std::string> Kind)440   create(std::unique_ptr<MemoryBuffer> Buffer);
441 
442   // Used for testing purpose only.
EmitDataRegion(MCDataRegionType Kind)443   void setValueProfDataEndianness(support::endianness Endianness) {
444     Index->setValueProfDataEndianness(Endianness);
445   }
EmitVersionMin(MCVersionMinType Type,unsigned Major,unsigned Minor,unsigned Update)446 
447   // See description in the base class. This interface is designed
448   // to be used by llvm-profdata (for dumping). Avoid using this when
449   // the client is the compiler.
450   InstrProfSymtab &getSymtab() override;
EmitBuildVersion(unsigned Platform,unsigned Major,unsigned Minor,unsigned Update)451   ProfileSummary &getSummary() { return *(Summary.get()); }
452 };
453 
454 } // end namespace llvm
455 
456 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
457