1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for instrumentation 10 // based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H 15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/IR/ProfileSummary.h" 20 #include "llvm/ProfileData/InstrProf.h" 21 #include "llvm/Support/Endian.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/LineIterator.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/OnDiskHashTable.h" 26 #include "llvm/Support/SwapByteOrder.h" 27 #include <algorithm> 28 #include <cassert> 29 #include <cstddef> 30 #include <cstdint> 31 #include <iterator> 32 #include <memory> 33 #include <utility> 34 #include <vector> 35 36 namespace llvm { 37 38 class InstrProfReader; 39 40 /// A file format agnostic iterator over profiling data. 41 class InstrProfIterator : public std::iterator<std::input_iterator_tag, 42 NamedInstrProfRecord> { 43 InstrProfReader *Reader = nullptr; 44 value_type Record; 45 46 void Increment(); 47 48 public: 49 InstrProfIterator() = default; 50 InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); } 51 52 InstrProfIterator &operator++() { Increment(); return *this; } 53 bool operator==(const InstrProfIterator &RHS) const { 54 return Reader == RHS.Reader; 55 } 56 bool operator!=(const InstrProfIterator &RHS) const { 57 return Reader != RHS.Reader; 58 } 59 value_type &operator*() { return Record; } 60 value_type *operator->() { return &Record; } 61 }; 62 63 /// Base class and interface for reading profiling data of any known instrprof 64 /// format. Provides an iterator over NamedInstrProfRecords. 65 class InstrProfReader { 66 instrprof_error LastError = instrprof_error::success; 67 68 public: 69 InstrProfReader() = default; 70 virtual ~InstrProfReader() = default; 71 72 /// Read the header. Required before reading first record. 73 virtual Error readHeader() = 0; 74 75 /// Read a single record. 76 virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0; 77 78 /// Iterator over profile data. 79 InstrProfIterator begin() { return InstrProfIterator(this); } 80 InstrProfIterator end() { return InstrProfIterator(); } 81 82 virtual bool isIRLevelProfile() const = 0; 83 84 virtual bool hasCSIRLevelProfile() const = 0; 85 86 virtual bool instrEntryBBEnabled() const = 0; 87 88 /// Return the PGO symtab. There are three different readers: 89 /// Raw, Text, and Indexed profile readers. The first two types 90 /// of readers are used only by llvm-profdata tool, while the indexed 91 /// profile reader is also used by llvm-cov tool and the compiler ( 92 /// backend or frontend). Since creating PGO symtab can create 93 /// significant runtime and memory overhead (as it touches data 94 /// for the whole program), InstrProfSymtab for the indexed profile 95 /// reader should be created on demand and it is recommended to be 96 /// only used for dumping purpose with llvm-proftool, not with the 97 /// compiler. 98 virtual InstrProfSymtab &getSymtab() = 0; 99 100 /// Compute the sum of counts and return in Sum. 101 void accumulateCounts(CountSumOrPercent &Sum, bool IsCS); 102 103 protected: 104 std::unique_ptr<InstrProfSymtab> Symtab; 105 106 /// Set the current error and return same. 107 Error error(instrprof_error Err) { 108 LastError = Err; 109 if (Err == instrprof_error::success) 110 return Error::success(); 111 return make_error<InstrProfError>(Err); 112 } 113 114 Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); } 115 116 /// Clear the current error and return a successful one. 117 Error success() { return error(instrprof_error::success); } 118 119 public: 120 /// Return true if the reader has finished reading the profile data. 121 bool isEOF() { return LastError == instrprof_error::eof; } 122 123 /// Return true if the reader encountered an error reading profiling data. 124 bool hasError() { return LastError != instrprof_error::success && !isEOF(); } 125 126 /// Get the current error. 127 Error getError() { 128 if (hasError()) 129 return make_error<InstrProfError>(LastError); 130 return Error::success(); 131 } 132 133 /// Factory method to create an appropriately typed reader for the given 134 /// instrprof file. 135 static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path); 136 137 static Expected<std::unique_ptr<InstrProfReader>> 138 create(std::unique_ptr<MemoryBuffer> Buffer); 139 }; 140 141 /// Reader for the simple text based instrprof format. 142 /// 143 /// This format is a simple text format that's suitable for test data. Records 144 /// are separated by one or more blank lines, and record fields are separated by 145 /// new lines. 146 /// 147 /// Each record consists of a function name, a function hash, a number of 148 /// counters, and then each counter value, in that order. 149 class TextInstrProfReader : public InstrProfReader { 150 private: 151 /// The profile data file contents. 152 std::unique_ptr<MemoryBuffer> DataBuffer; 153 /// Iterator over the profile data. 154 line_iterator Line; 155 bool IsIRLevelProfile = false; 156 bool HasCSIRLevelProfile = false; 157 bool InstrEntryBBEnabled = false; 158 159 Error readValueProfileData(InstrProfRecord &Record); 160 161 public: 162 TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_) 163 : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {} 164 TextInstrProfReader(const TextInstrProfReader &) = delete; 165 TextInstrProfReader &operator=(const TextInstrProfReader &) = delete; 166 167 /// Return true if the given buffer is in text instrprof format. 168 static bool hasFormat(const MemoryBuffer &Buffer); 169 170 bool isIRLevelProfile() const override { return IsIRLevelProfile; } 171 172 bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; } 173 174 bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; } 175 176 /// Read the header. 177 Error readHeader() override; 178 179 /// Read a single record. 180 Error readNextRecord(NamedInstrProfRecord &Record) override; 181 182 InstrProfSymtab &getSymtab() override { 183 assert(Symtab.get()); 184 return *Symtab.get(); 185 } 186 }; 187 188 /// Reader for the raw instrprof binary format from runtime. 189 /// 190 /// This format is a raw memory dump of the instrumentation-baed profiling data 191 /// from the runtime. It has no index. 192 /// 193 /// Templated on the unsigned type whose size matches pointers on the platform 194 /// that wrote the profile. 195 template <class IntPtrT> 196 class RawInstrProfReader : public InstrProfReader { 197 private: 198 /// The profile data file contents. 199 std::unique_ptr<MemoryBuffer> DataBuffer; 200 bool ShouldSwapBytes; 201 // The value of the version field of the raw profile data header. The lower 56 202 // bits specifies the format version and the most significant 8 bits specify 203 // the variant types of the profile. 204 uint64_t Version; 205 uint64_t CountersDelta; 206 uint64_t NamesDelta; 207 const RawInstrProf::ProfileData<IntPtrT> *Data; 208 const RawInstrProf::ProfileData<IntPtrT> *DataEnd; 209 const uint64_t *CountersStart; 210 const char *NamesStart; 211 uint64_t NamesSize; 212 // After value profile is all read, this pointer points to 213 // the header of next profile data (if exists) 214 const uint8_t *ValueDataStart; 215 uint32_t ValueKindLast; 216 uint32_t CurValueDataSize; 217 218 public: 219 RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer) 220 : DataBuffer(std::move(DataBuffer)) {} 221 RawInstrProfReader(const RawInstrProfReader &) = delete; 222 RawInstrProfReader &operator=(const RawInstrProfReader &) = delete; 223 224 static bool hasFormat(const MemoryBuffer &DataBuffer); 225 Error readHeader() override; 226 Error readNextRecord(NamedInstrProfRecord &Record) override; 227 228 bool isIRLevelProfile() const override { 229 return (Version & VARIANT_MASK_IR_PROF) != 0; 230 } 231 232 bool hasCSIRLevelProfile() const override { 233 return (Version & VARIANT_MASK_CSIR_PROF) != 0; 234 } 235 236 bool instrEntryBBEnabled() const override { 237 return (Version & VARIANT_MASK_INSTR_ENTRY) != 0; 238 } 239 240 InstrProfSymtab &getSymtab() override { 241 assert(Symtab.get()); 242 return *Symtab.get(); 243 } 244 245 private: 246 Error createSymtab(InstrProfSymtab &Symtab); 247 Error readNextHeader(const char *CurrentPos); 248 Error readHeader(const RawInstrProf::Header &Header); 249 250 template <class IntT> IntT swap(IntT Int) const { 251 return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; 252 } 253 254 support::endianness getDataEndianness() const { 255 support::endianness HostEndian = getHostEndianness(); 256 if (!ShouldSwapBytes) 257 return HostEndian; 258 if (HostEndian == support::little) 259 return support::big; 260 else 261 return support::little; 262 } 263 264 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) { 265 return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); 266 } 267 268 Error readName(NamedInstrProfRecord &Record); 269 Error readFuncHash(NamedInstrProfRecord &Record); 270 Error readRawCounts(InstrProfRecord &Record); 271 Error readValueProfilingData(InstrProfRecord &Record); 272 bool atEnd() const { return Data == DataEnd; } 273 274 void advanceData() { 275 Data++; 276 ValueDataStart += CurValueDataSize; 277 } 278 279 const char *getNextHeaderPos() const { 280 assert(atEnd()); 281 return (const char *)ValueDataStart; 282 } 283 284 /// Get the offset of \p CounterPtr from the start of the counters section of 285 /// the profile. The offset has units of "number of counters", i.e. increasing 286 /// the offset by 1 corresponds to an increase in the *byte offset* by 8. 287 ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const { 288 return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); 289 } 290 291 const uint64_t *getCounter(ptrdiff_t Offset) const { 292 return CountersStart + Offset; 293 } 294 295 StringRef getName(uint64_t NameRef) const { 296 return Symtab->getFuncName(swap(NameRef)); 297 } 298 }; 299 300 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>; 301 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>; 302 303 namespace IndexedInstrProf { 304 305 enum class HashT : uint32_t; 306 307 } // end namespace IndexedInstrProf 308 309 /// Trait for lookups into the on-disk hash table for the binary instrprof 310 /// format. 311 class InstrProfLookupTrait { 312 std::vector<NamedInstrProfRecord> DataBuffer; 313 IndexedInstrProf::HashT HashType; 314 unsigned FormatVersion; 315 // Endianness of the input value profile data. 316 // It should be LE by default, but can be changed 317 // for testing purpose. 318 support::endianness ValueProfDataEndianness = support::little; 319 320 public: 321 InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion) 322 : HashType(HashType), FormatVersion(FormatVersion) {} 323 324 using data_type = ArrayRef<NamedInstrProfRecord>; 325 326 using internal_key_type = StringRef; 327 using external_key_type = StringRef; 328 using hash_value_type = uint64_t; 329 using offset_type = uint64_t; 330 331 static bool EqualKey(StringRef A, StringRef B) { return A == B; } 332 static StringRef GetInternalKey(StringRef K) { return K; } 333 static StringRef GetExternalKey(StringRef K) { return K; } 334 335 hash_value_type ComputeHash(StringRef K); 336 337 static std::pair<offset_type, offset_type> 338 ReadKeyDataLength(const unsigned char *&D) { 339 using namespace support; 340 341 offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D); 342 offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D); 343 return std::make_pair(KeyLen, DataLen); 344 } 345 346 StringRef ReadKey(const unsigned char *D, offset_type N) { 347 return StringRef((const char *)D, N); 348 } 349 350 bool readValueProfilingData(const unsigned char *&D, 351 const unsigned char *const End); 352 data_type ReadData(StringRef K, const unsigned char *D, offset_type N); 353 354 // Used for testing purpose only. 355 void setValueProfDataEndianness(support::endianness Endianness) { 356 ValueProfDataEndianness = Endianness; 357 } 358 }; 359 360 struct InstrProfReaderIndexBase { 361 virtual ~InstrProfReaderIndexBase() = default; 362 363 // Read all the profile records with the same key pointed to the current 364 // iterator. 365 virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0; 366 367 // Read all the profile records with the key equal to FuncName 368 virtual Error getRecords(StringRef FuncName, 369 ArrayRef<NamedInstrProfRecord> &Data) = 0; 370 virtual void advanceToNextKey() = 0; 371 virtual bool atEnd() const = 0; 372 virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; 373 virtual uint64_t getVersion() const = 0; 374 virtual bool isIRLevelProfile() const = 0; 375 virtual bool hasCSIRLevelProfile() const = 0; 376 virtual bool instrEntryBBEnabled() const = 0; 377 virtual Error populateSymtab(InstrProfSymtab &) = 0; 378 }; 379 380 using OnDiskHashTableImplV3 = 381 OnDiskIterableChainedHashTable<InstrProfLookupTrait>; 382 383 template <typename HashTableImpl> 384 class InstrProfReaderItaniumRemapper; 385 386 template <typename HashTableImpl> 387 class InstrProfReaderIndex : public InstrProfReaderIndexBase { 388 private: 389 std::unique_ptr<HashTableImpl> HashTable; 390 typename HashTableImpl::data_iterator RecordIterator; 391 uint64_t FormatVersion; 392 393 friend class InstrProfReaderItaniumRemapper<HashTableImpl>; 394 395 public: 396 InstrProfReaderIndex(const unsigned char *Buckets, 397 const unsigned char *const Payload, 398 const unsigned char *const Base, 399 IndexedInstrProf::HashT HashType, uint64_t Version); 400 ~InstrProfReaderIndex() override = default; 401 402 Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override; 403 Error getRecords(StringRef FuncName, 404 ArrayRef<NamedInstrProfRecord> &Data) override; 405 void advanceToNextKey() override { RecordIterator++; } 406 407 bool atEnd() const override { 408 return RecordIterator == HashTable->data_end(); 409 } 410 411 void setValueProfDataEndianness(support::endianness Endianness) override { 412 HashTable->getInfoObj().setValueProfDataEndianness(Endianness); 413 } 414 415 uint64_t getVersion() const override { return GET_VERSION(FormatVersion); } 416 417 bool isIRLevelProfile() const override { 418 return (FormatVersion & VARIANT_MASK_IR_PROF) != 0; 419 } 420 421 bool hasCSIRLevelProfile() const override { 422 return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0; 423 } 424 425 bool instrEntryBBEnabled() const override { 426 return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0; 427 } 428 429 Error populateSymtab(InstrProfSymtab &Symtab) override { 430 return Symtab.create(HashTable->keys()); 431 } 432 }; 433 434 /// Name matcher supporting fuzzy matching of symbol names to names in profiles. 435 class InstrProfReaderRemapper { 436 public: 437 virtual ~InstrProfReaderRemapper() {} 438 virtual Error populateRemappings() { return Error::success(); } 439 virtual Error getRecords(StringRef FuncName, 440 ArrayRef<NamedInstrProfRecord> &Data) = 0; 441 }; 442 443 /// Reader for the indexed binary instrprof format. 444 class IndexedInstrProfReader : public InstrProfReader { 445 private: 446 /// The profile data file contents. 447 std::unique_ptr<MemoryBuffer> DataBuffer; 448 /// The profile remapping file contents. 449 std::unique_ptr<MemoryBuffer> RemappingBuffer; 450 /// The index into the profile data. 451 std::unique_ptr<InstrProfReaderIndexBase> Index; 452 /// The profile remapping file contents. 453 std::unique_ptr<InstrProfReaderRemapper> Remapper; 454 /// Profile summary data. 455 std::unique_ptr<ProfileSummary> Summary; 456 /// Context sensitive profile summary data. 457 std::unique_ptr<ProfileSummary> CS_Summary; 458 // Index to the current record in the record array. 459 unsigned RecordIndex; 460 461 // Read the profile summary. Return a pointer pointing to one byte past the 462 // end of the summary data if it exists or the input \c Cur. 463 // \c UseCS indicates whether to use the context-sensitive profile summary. 464 const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, 465 const unsigned char *Cur, bool UseCS); 466 467 public: 468 IndexedInstrProfReader( 469 std::unique_ptr<MemoryBuffer> DataBuffer, 470 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr) 471 : DataBuffer(std::move(DataBuffer)), 472 RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {} 473 IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; 474 IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; 475 476 /// Return the profile version. 477 uint64_t getVersion() const { return Index->getVersion(); } 478 bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); } 479 bool hasCSIRLevelProfile() const override { 480 return Index->hasCSIRLevelProfile(); 481 } 482 483 bool instrEntryBBEnabled() const override { 484 return Index->instrEntryBBEnabled(); 485 } 486 487 /// Return true if the given buffer is in an indexed instrprof format. 488 static bool hasFormat(const MemoryBuffer &DataBuffer); 489 490 /// Read the file header. 491 Error readHeader() override; 492 /// Read a single record. 493 Error readNextRecord(NamedInstrProfRecord &Record) override; 494 495 /// Return the NamedInstrProfRecord associated with FuncName and FuncHash 496 Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName, 497 uint64_t FuncHash); 498 499 /// Fill Counts with the profile data for the given function name. 500 Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, 501 std::vector<uint64_t> &Counts); 502 503 /// Return the maximum of all known function counts. 504 /// \c UseCS indicates whether to use the context-sensitive count. 505 uint64_t getMaximumFunctionCount(bool UseCS) { 506 if (UseCS) { 507 assert(CS_Summary && "No context sensitive profile summary"); 508 return CS_Summary->getMaxFunctionCount(); 509 } else { 510 assert(Summary && "No profile summary"); 511 return Summary->getMaxFunctionCount(); 512 } 513 } 514 515 /// Factory method to create an indexed reader. 516 static Expected<std::unique_ptr<IndexedInstrProfReader>> 517 create(const Twine &Path, const Twine &RemappingPath = ""); 518 519 static Expected<std::unique_ptr<IndexedInstrProfReader>> 520 create(std::unique_ptr<MemoryBuffer> Buffer, 521 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr); 522 523 // Used for testing purpose only. 524 void setValueProfDataEndianness(support::endianness Endianness) { 525 Index->setValueProfDataEndianness(Endianness); 526 } 527 528 // See description in the base class. This interface is designed 529 // to be used by llvm-profdata (for dumping). Avoid using this when 530 // the client is the compiler. 531 InstrProfSymtab &getSymtab() override; 532 533 /// Return the profile summary. 534 /// \c UseCS indicates whether to use the context-sensitive summary. 535 ProfileSummary &getSummary(bool UseCS) { 536 if (UseCS) { 537 assert(CS_Summary && "No context sensitive summary"); 538 return *(CS_Summary.get()); 539 } else { 540 assert(Summary && "No profile summary"); 541 return *(Summary.get()); 542 } 543 } 544 }; 545 546 } // end namespace llvm 547 548 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H 549