1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for instrumentation 10 // based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H 15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/IR/ProfileSummary.h" 20 #include "llvm/ProfileData/InstrProf.h" 21 #include "llvm/Support/Endian.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/LineIterator.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/OnDiskHashTable.h" 26 #include "llvm/Support/SwapByteOrder.h" 27 #include <algorithm> 28 #include <cassert> 29 #include <cstddef> 30 #include <cstdint> 31 #include <iterator> 32 #include <memory> 33 #include <utility> 34 #include <vector> 35 36 namespace llvm { 37 38 class InstrProfReader; 39 40 /// A file format agnostic iterator over profiling data. 41 class InstrProfIterator { 42 public: 43 using iterator_category = std::input_iterator_tag; 44 using value_type = NamedInstrProfRecord; 45 using difference_type = std::ptrdiff_t; 46 using pointer = value_type *; 47 using reference = value_type &; 48 49 private: 50 InstrProfReader *Reader = nullptr; 51 value_type Record; 52 53 void Increment(); 54 55 public: 56 InstrProfIterator() = default; 57 InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); } 58 59 InstrProfIterator &operator++() { Increment(); return *this; } 60 bool operator==(const InstrProfIterator &RHS) const { 61 return Reader == RHS.Reader; 62 } 63 bool operator!=(const InstrProfIterator &RHS) const { 64 return Reader != RHS.Reader; 65 } 66 value_type &operator*() { return Record; } 67 value_type *operator->() { return &Record; } 68 }; 69 70 /// Base class and interface for reading profiling data of any known instrprof 71 /// format. Provides an iterator over NamedInstrProfRecords. 72 class InstrProfReader { 73 instrprof_error LastError = instrprof_error::success; 74 75 public: 76 InstrProfReader() = default; 77 virtual ~InstrProfReader() = default; 78 79 /// Read the header. Required before reading first record. 80 virtual Error readHeader() = 0; 81 82 /// Read a single record. 83 virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0; 84 85 /// Print binary ids on stream OS. 86 virtual Error printBinaryIds(raw_ostream &OS) { return success(); }; 87 88 /// Iterator over profile data. 89 InstrProfIterator begin() { return InstrProfIterator(this); } 90 InstrProfIterator end() { return InstrProfIterator(); } 91 92 virtual bool isIRLevelProfile() const = 0; 93 94 virtual bool hasCSIRLevelProfile() const = 0; 95 96 virtual bool instrEntryBBEnabled() const = 0; 97 98 /// Return the PGO symtab. There are three different readers: 99 /// Raw, Text, and Indexed profile readers. The first two types 100 /// of readers are used only by llvm-profdata tool, while the indexed 101 /// profile reader is also used by llvm-cov tool and the compiler ( 102 /// backend or frontend). Since creating PGO symtab can create 103 /// significant runtime and memory overhead (as it touches data 104 /// for the whole program), InstrProfSymtab for the indexed profile 105 /// reader should be created on demand and it is recommended to be 106 /// only used for dumping purpose with llvm-proftool, not with the 107 /// compiler. 108 virtual InstrProfSymtab &getSymtab() = 0; 109 110 /// Compute the sum of counts and return in Sum. 111 void accumulateCounts(CountSumOrPercent &Sum, bool IsCS); 112 113 protected: 114 std::unique_ptr<InstrProfSymtab> Symtab; 115 116 /// Set the current error and return same. 117 Error error(instrprof_error Err) { 118 LastError = Err; 119 if (Err == instrprof_error::success) 120 return Error::success(); 121 return make_error<InstrProfError>(Err); 122 } 123 124 Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); } 125 126 /// Clear the current error and return a successful one. 127 Error success() { return error(instrprof_error::success); } 128 129 public: 130 /// Return true if the reader has finished reading the profile data. 131 bool isEOF() { return LastError == instrprof_error::eof; } 132 133 /// Return true if the reader encountered an error reading profiling data. 134 bool hasError() { return LastError != instrprof_error::success && !isEOF(); } 135 136 /// Get the current error. 137 Error getError() { 138 if (hasError()) 139 return make_error<InstrProfError>(LastError); 140 return Error::success(); 141 } 142 143 /// Factory method to create an appropriately typed reader for the given 144 /// instrprof file. 145 static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path); 146 147 static Expected<std::unique_ptr<InstrProfReader>> 148 create(std::unique_ptr<MemoryBuffer> Buffer); 149 }; 150 151 /// Reader for the simple text based instrprof format. 152 /// 153 /// This format is a simple text format that's suitable for test data. Records 154 /// are separated by one or more blank lines, and record fields are separated by 155 /// new lines. 156 /// 157 /// Each record consists of a function name, a function hash, a number of 158 /// counters, and then each counter value, in that order. 159 class TextInstrProfReader : public InstrProfReader { 160 private: 161 /// The profile data file contents. 162 std::unique_ptr<MemoryBuffer> DataBuffer; 163 /// Iterator over the profile data. 164 line_iterator Line; 165 bool IsIRLevelProfile = false; 166 bool HasCSIRLevelProfile = false; 167 bool InstrEntryBBEnabled = false; 168 169 Error readValueProfileData(InstrProfRecord &Record); 170 171 public: 172 TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_) 173 : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {} 174 TextInstrProfReader(const TextInstrProfReader &) = delete; 175 TextInstrProfReader &operator=(const TextInstrProfReader &) = delete; 176 177 /// Return true if the given buffer is in text instrprof format. 178 static bool hasFormat(const MemoryBuffer &Buffer); 179 180 bool isIRLevelProfile() const override { return IsIRLevelProfile; } 181 182 bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; } 183 184 bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; } 185 186 /// Read the header. 187 Error readHeader() override; 188 189 /// Read a single record. 190 Error readNextRecord(NamedInstrProfRecord &Record) override; 191 192 InstrProfSymtab &getSymtab() override { 193 assert(Symtab.get()); 194 return *Symtab.get(); 195 } 196 }; 197 198 /// Reader for the raw instrprof binary format from runtime. 199 /// 200 /// This format is a raw memory dump of the instrumentation-baed profiling data 201 /// from the runtime. It has no index. 202 /// 203 /// Templated on the unsigned type whose size matches pointers on the platform 204 /// that wrote the profile. 205 template <class IntPtrT> 206 class RawInstrProfReader : public InstrProfReader { 207 private: 208 /// The profile data file contents. 209 std::unique_ptr<MemoryBuffer> DataBuffer; 210 bool ShouldSwapBytes; 211 // The value of the version field of the raw profile data header. The lower 56 212 // bits specifies the format version and the most significant 8 bits specify 213 // the variant types of the profile. 214 uint64_t Version; 215 uint64_t CountersDelta; 216 uint64_t NamesDelta; 217 const RawInstrProf::ProfileData<IntPtrT> *Data; 218 const RawInstrProf::ProfileData<IntPtrT> *DataEnd; 219 const uint64_t *CountersStart; 220 const char *NamesStart; 221 uint64_t NamesSize; 222 // After value profile is all read, this pointer points to 223 // the header of next profile data (if exists) 224 const uint8_t *ValueDataStart; 225 uint32_t ValueKindLast; 226 uint32_t CurValueDataSize; 227 228 uint64_t BinaryIdsSize; 229 const uint8_t *BinaryIdsStart; 230 231 public: 232 RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer) 233 : DataBuffer(std::move(DataBuffer)) {} 234 RawInstrProfReader(const RawInstrProfReader &) = delete; 235 RawInstrProfReader &operator=(const RawInstrProfReader &) = delete; 236 237 static bool hasFormat(const MemoryBuffer &DataBuffer); 238 Error readHeader() override; 239 Error readNextRecord(NamedInstrProfRecord &Record) override; 240 Error printBinaryIds(raw_ostream &OS) override; 241 242 bool isIRLevelProfile() const override { 243 return (Version & VARIANT_MASK_IR_PROF) != 0; 244 } 245 246 bool hasCSIRLevelProfile() const override { 247 return (Version & VARIANT_MASK_CSIR_PROF) != 0; 248 } 249 250 bool instrEntryBBEnabled() const override { 251 return (Version & VARIANT_MASK_INSTR_ENTRY) != 0; 252 } 253 254 InstrProfSymtab &getSymtab() override { 255 assert(Symtab.get()); 256 return *Symtab.get(); 257 } 258 259 private: 260 Error createSymtab(InstrProfSymtab &Symtab); 261 Error readNextHeader(const char *CurrentPos); 262 Error readHeader(const RawInstrProf::Header &Header); 263 264 template <class IntT> IntT swap(IntT Int) const { 265 return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; 266 } 267 268 support::endianness getDataEndianness() const { 269 support::endianness HostEndian = getHostEndianness(); 270 if (!ShouldSwapBytes) 271 return HostEndian; 272 if (HostEndian == support::little) 273 return support::big; 274 else 275 return support::little; 276 } 277 278 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) { 279 return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); 280 } 281 282 Error readName(NamedInstrProfRecord &Record); 283 Error readFuncHash(NamedInstrProfRecord &Record); 284 Error readRawCounts(InstrProfRecord &Record); 285 Error readValueProfilingData(InstrProfRecord &Record); 286 bool atEnd() const { return Data == DataEnd; } 287 288 void advanceData() { 289 Data++; 290 ValueDataStart += CurValueDataSize; 291 } 292 293 const char *getNextHeaderPos() const { 294 assert(atEnd()); 295 return (const char *)ValueDataStart; 296 } 297 298 /// Get the offset of \p CounterPtr from the start of the counters section of 299 /// the profile. The offset has units of "number of counters", i.e. increasing 300 /// the offset by 1 corresponds to an increase in the *byte offset* by 8. 301 ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const { 302 return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); 303 } 304 305 const uint64_t *getCounter(ptrdiff_t Offset) const { 306 return CountersStart + Offset; 307 } 308 309 StringRef getName(uint64_t NameRef) const { 310 return Symtab->getFuncName(swap(NameRef)); 311 } 312 }; 313 314 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>; 315 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>; 316 317 namespace IndexedInstrProf { 318 319 enum class HashT : uint32_t; 320 321 } // end namespace IndexedInstrProf 322 323 /// Trait for lookups into the on-disk hash table for the binary instrprof 324 /// format. 325 class InstrProfLookupTrait { 326 std::vector<NamedInstrProfRecord> DataBuffer; 327 IndexedInstrProf::HashT HashType; 328 unsigned FormatVersion; 329 // Endianness of the input value profile data. 330 // It should be LE by default, but can be changed 331 // for testing purpose. 332 support::endianness ValueProfDataEndianness = support::little; 333 334 public: 335 InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion) 336 : HashType(HashType), FormatVersion(FormatVersion) {} 337 338 using data_type = ArrayRef<NamedInstrProfRecord>; 339 340 using internal_key_type = StringRef; 341 using external_key_type = StringRef; 342 using hash_value_type = uint64_t; 343 using offset_type = uint64_t; 344 345 static bool EqualKey(StringRef A, StringRef B) { return A == B; } 346 static StringRef GetInternalKey(StringRef K) { return K; } 347 static StringRef GetExternalKey(StringRef K) { return K; } 348 349 hash_value_type ComputeHash(StringRef K); 350 351 static std::pair<offset_type, offset_type> 352 ReadKeyDataLength(const unsigned char *&D) { 353 using namespace support; 354 355 offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D); 356 offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D); 357 return std::make_pair(KeyLen, DataLen); 358 } 359 360 StringRef ReadKey(const unsigned char *D, offset_type N) { 361 return StringRef((const char *)D, N); 362 } 363 364 bool readValueProfilingData(const unsigned char *&D, 365 const unsigned char *const End); 366 data_type ReadData(StringRef K, const unsigned char *D, offset_type N); 367 368 // Used for testing purpose only. 369 void setValueProfDataEndianness(support::endianness Endianness) { 370 ValueProfDataEndianness = Endianness; 371 } 372 }; 373 374 struct InstrProfReaderIndexBase { 375 virtual ~InstrProfReaderIndexBase() = default; 376 377 // Read all the profile records with the same key pointed to the current 378 // iterator. 379 virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0; 380 381 // Read all the profile records with the key equal to FuncName 382 virtual Error getRecords(StringRef FuncName, 383 ArrayRef<NamedInstrProfRecord> &Data) = 0; 384 virtual void advanceToNextKey() = 0; 385 virtual bool atEnd() const = 0; 386 virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; 387 virtual uint64_t getVersion() const = 0; 388 virtual bool isIRLevelProfile() const = 0; 389 virtual bool hasCSIRLevelProfile() const = 0; 390 virtual bool instrEntryBBEnabled() const = 0; 391 virtual Error populateSymtab(InstrProfSymtab &) = 0; 392 }; 393 394 using OnDiskHashTableImplV3 = 395 OnDiskIterableChainedHashTable<InstrProfLookupTrait>; 396 397 template <typename HashTableImpl> 398 class InstrProfReaderItaniumRemapper; 399 400 template <typename HashTableImpl> 401 class InstrProfReaderIndex : public InstrProfReaderIndexBase { 402 private: 403 std::unique_ptr<HashTableImpl> HashTable; 404 typename HashTableImpl::data_iterator RecordIterator; 405 uint64_t FormatVersion; 406 407 friend class InstrProfReaderItaniumRemapper<HashTableImpl>; 408 409 public: 410 InstrProfReaderIndex(const unsigned char *Buckets, 411 const unsigned char *const Payload, 412 const unsigned char *const Base, 413 IndexedInstrProf::HashT HashType, uint64_t Version); 414 ~InstrProfReaderIndex() override = default; 415 416 Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override; 417 Error getRecords(StringRef FuncName, 418 ArrayRef<NamedInstrProfRecord> &Data) override; 419 void advanceToNextKey() override { RecordIterator++; } 420 421 bool atEnd() const override { 422 return RecordIterator == HashTable->data_end(); 423 } 424 425 void setValueProfDataEndianness(support::endianness Endianness) override { 426 HashTable->getInfoObj().setValueProfDataEndianness(Endianness); 427 } 428 429 uint64_t getVersion() const override { return GET_VERSION(FormatVersion); } 430 431 bool isIRLevelProfile() const override { 432 return (FormatVersion & VARIANT_MASK_IR_PROF) != 0; 433 } 434 435 bool hasCSIRLevelProfile() const override { 436 return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0; 437 } 438 439 bool instrEntryBBEnabled() const override { 440 return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0; 441 } 442 443 Error populateSymtab(InstrProfSymtab &Symtab) override { 444 return Symtab.create(HashTable->keys()); 445 } 446 }; 447 448 /// Name matcher supporting fuzzy matching of symbol names to names in profiles. 449 class InstrProfReaderRemapper { 450 public: 451 virtual ~InstrProfReaderRemapper() {} 452 virtual Error populateRemappings() { return Error::success(); } 453 virtual Error getRecords(StringRef FuncName, 454 ArrayRef<NamedInstrProfRecord> &Data) = 0; 455 }; 456 457 /// Reader for the indexed binary instrprof format. 458 class IndexedInstrProfReader : public InstrProfReader { 459 private: 460 /// The profile data file contents. 461 std::unique_ptr<MemoryBuffer> DataBuffer; 462 /// The profile remapping file contents. 463 std::unique_ptr<MemoryBuffer> RemappingBuffer; 464 /// The index into the profile data. 465 std::unique_ptr<InstrProfReaderIndexBase> Index; 466 /// The profile remapping file contents. 467 std::unique_ptr<InstrProfReaderRemapper> Remapper; 468 /// Profile summary data. 469 std::unique_ptr<ProfileSummary> Summary; 470 /// Context sensitive profile summary data. 471 std::unique_ptr<ProfileSummary> CS_Summary; 472 // Index to the current record in the record array. 473 unsigned RecordIndex; 474 475 // Read the profile summary. Return a pointer pointing to one byte past the 476 // end of the summary data if it exists or the input \c Cur. 477 // \c UseCS indicates whether to use the context-sensitive profile summary. 478 const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, 479 const unsigned char *Cur, bool UseCS); 480 481 public: 482 IndexedInstrProfReader( 483 std::unique_ptr<MemoryBuffer> DataBuffer, 484 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr) 485 : DataBuffer(std::move(DataBuffer)), 486 RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {} 487 IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; 488 IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; 489 490 /// Return the profile version. 491 uint64_t getVersion() const { return Index->getVersion(); } 492 bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); } 493 bool hasCSIRLevelProfile() const override { 494 return Index->hasCSIRLevelProfile(); 495 } 496 497 bool instrEntryBBEnabled() const override { 498 return Index->instrEntryBBEnabled(); 499 } 500 501 /// Return true if the given buffer is in an indexed instrprof format. 502 static bool hasFormat(const MemoryBuffer &DataBuffer); 503 504 /// Read the file header. 505 Error readHeader() override; 506 /// Read a single record. 507 Error readNextRecord(NamedInstrProfRecord &Record) override; 508 509 /// Return the NamedInstrProfRecord associated with FuncName and FuncHash 510 Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName, 511 uint64_t FuncHash); 512 513 /// Fill Counts with the profile data for the given function name. 514 Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, 515 std::vector<uint64_t> &Counts); 516 517 /// Return the maximum of all known function counts. 518 /// \c UseCS indicates whether to use the context-sensitive count. 519 uint64_t getMaximumFunctionCount(bool UseCS) { 520 if (UseCS) { 521 assert(CS_Summary && "No context sensitive profile summary"); 522 return CS_Summary->getMaxFunctionCount(); 523 } else { 524 assert(Summary && "No profile summary"); 525 return Summary->getMaxFunctionCount(); 526 } 527 } 528 529 /// Factory method to create an indexed reader. 530 static Expected<std::unique_ptr<IndexedInstrProfReader>> 531 create(const Twine &Path, const Twine &RemappingPath = ""); 532 533 static Expected<std::unique_ptr<IndexedInstrProfReader>> 534 create(std::unique_ptr<MemoryBuffer> Buffer, 535 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr); 536 537 // Used for testing purpose only. 538 void setValueProfDataEndianness(support::endianness Endianness) { 539 Index->setValueProfDataEndianness(Endianness); 540 } 541 542 // See description in the base class. This interface is designed 543 // to be used by llvm-profdata (for dumping). Avoid using this when 544 // the client is the compiler. 545 InstrProfSymtab &getSymtab() override; 546 547 /// Return the profile summary. 548 /// \c UseCS indicates whether to use the context-sensitive summary. 549 ProfileSummary &getSummary(bool UseCS) { 550 if (UseCS) { 551 assert(CS_Summary && "No context sensitive summary"); 552 return *(CS_Summary.get()); 553 } else { 554 assert(Summary && "No profile summary"); 555 return *(Summary.get()); 556 } 557 } 558 }; 559 560 } // end namespace llvm 561 562 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H 563