1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading MemProf profiling data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <algorithm> 14 #include <cstdint> 15 #include <memory> 16 #include <type_traits> 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/ADT/StringExtras.h" 23 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 24 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 25 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" 26 #include "llvm/Object/Binary.h" 27 #include "llvm/Object/ELFObjectFile.h" 28 #include "llvm/Object/ObjectFile.h" 29 #include "llvm/ProfileData/InstrProf.h" 30 #include "llvm/ProfileData/MemProf.h" 31 #include "llvm/ProfileData/MemProfData.inc" 32 #include "llvm/ProfileData/RawMemProfReader.h" 33 #include "llvm/Support/Endian.h" 34 #include "llvm/Support/Path.h" 35 36 #define DEBUG_TYPE "memprof" 37 38 namespace llvm { 39 namespace memprof { 40 namespace { 41 template <class T = uint64_t> inline T alignedRead(const char *Ptr) { 42 static_assert(std::is_pod<T>::value, "Not a pod type."); 43 assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read"); 44 return *reinterpret_cast<const T *>(Ptr); 45 } 46 47 Error checkBuffer(const MemoryBuffer &Buffer) { 48 if (!RawMemProfReader::hasFormat(Buffer)) 49 return make_error<InstrProfError>(instrprof_error::bad_magic); 50 51 if (Buffer.getBufferSize() == 0) 52 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 53 54 if (Buffer.getBufferSize() < sizeof(Header)) { 55 return make_error<InstrProfError>(instrprof_error::truncated); 56 } 57 58 // The size of the buffer can be > header total size since we allow repeated 59 // serialization of memprof profiles to the same file. 60 uint64_t TotalSize = 0; 61 const char *Next = Buffer.getBufferStart(); 62 while (Next < Buffer.getBufferEnd()) { 63 auto *H = reinterpret_cast<const Header *>(Next); 64 if (H->Version != MEMPROF_RAW_VERSION) { 65 return make_error<InstrProfError>(instrprof_error::unsupported_version); 66 } 67 68 TotalSize += H->TotalSize; 69 Next += H->TotalSize; 70 } 71 72 if (Buffer.getBufferSize() != TotalSize) { 73 return make_error<InstrProfError>(instrprof_error::malformed); 74 } 75 return Error::success(); 76 } 77 78 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) { 79 using namespace support; 80 81 const uint64_t NumItemsToRead = 82 endian::readNext<uint64_t, little, unaligned>(Ptr); 83 llvm::SmallVector<SegmentEntry> Items; 84 for (uint64_t I = 0; I < NumItemsToRead; I++) { 85 Items.push_back(*reinterpret_cast<const SegmentEntry *>( 86 Ptr + I * sizeof(SegmentEntry))); 87 } 88 return Items; 89 } 90 91 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 92 readMemInfoBlocks(const char *Ptr) { 93 using namespace support; 94 95 const uint64_t NumItemsToRead = 96 endian::readNext<uint64_t, little, unaligned>(Ptr); 97 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; 98 for (uint64_t I = 0; I < NumItemsToRead; I++) { 99 const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr); 100 const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); 101 Items.push_back({Id, MIB}); 102 // Only increment by size of MIB since readNext implicitly increments. 103 Ptr += sizeof(MemInfoBlock); 104 } 105 return Items; 106 } 107 108 CallStackMap readStackInfo(const char *Ptr) { 109 using namespace support; 110 111 const uint64_t NumItemsToRead = 112 endian::readNext<uint64_t, little, unaligned>(Ptr); 113 CallStackMap Items; 114 115 for (uint64_t I = 0; I < NumItemsToRead; I++) { 116 const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr); 117 const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr); 118 119 SmallVector<uint64_t> CallStack; 120 for (uint64_t J = 0; J < NumPCs; J++) { 121 CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr)); 122 } 123 124 Items[StackId] = CallStack; 125 } 126 return Items; 127 } 128 129 // Merges the contents of stack information in \p From to \p To. Returns true if 130 // any stack ids observed previously map to a different set of program counter 131 // addresses. 132 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { 133 for (const auto &IdStack : From) { 134 auto I = To.find(IdStack.first); 135 if (I == To.end()) { 136 To[IdStack.first] = IdStack.second; 137 } else { 138 // Check that the PCs are the same (in order). 139 if (IdStack.second != I->second) 140 return true; 141 } 142 } 143 return false; 144 } 145 146 Error report(Error E, const StringRef Context) { 147 return joinErrors(createStringError(inconvertibleErrorCode(), Context), 148 std::move(E)); 149 } 150 151 bool isRuntimePath(const StringRef Path) { 152 return StringRef(llvm::sys::path::convert_to_slash(Path)) 153 .contains("memprof/memprof_"); 154 } 155 156 std::string getBuildIdString(const SegmentEntry &Entry) { 157 constexpr size_t Size = sizeof(Entry.BuildId) / sizeof(uint8_t); 158 constexpr uint8_t Zeros[Size] = {0}; 159 // If the build id is unset print a helpful string instead of all zeros. 160 if (memcmp(Entry.BuildId, Zeros, Size) == 0) 161 return "<None>"; 162 163 std::string Str; 164 raw_string_ostream OS(Str); 165 for (size_t I = 0; I < Size; I++) { 166 OS << format_hex_no_prefix(Entry.BuildId[I], 2); 167 } 168 return OS.str(); 169 } 170 } // namespace 171 172 Expected<std::unique_ptr<RawMemProfReader>> 173 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, 174 bool KeepName) { 175 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 176 if (std::error_code EC = BufferOr.getError()) 177 return report(errorCodeToError(EC), Path.getSingleStringRef()); 178 179 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 180 if (Error E = checkBuffer(*Buffer)) 181 return report(std::move(E), Path.getSingleStringRef()); 182 183 if (ProfiledBinary.empty()) 184 return report( 185 errorCodeToError(make_error_code(std::errc::invalid_argument)), 186 "Path to profiled binary is empty!"); 187 188 auto BinaryOr = llvm::object::createBinary(ProfiledBinary); 189 if (!BinaryOr) { 190 return report(BinaryOr.takeError(), ProfiledBinary); 191 } 192 193 // Use new here since constructor is private. 194 std::unique_ptr<RawMemProfReader> Reader( 195 new RawMemProfReader(std::move(BinaryOr.get()), KeepName)); 196 if (Error E = Reader->initialize(std::move(Buffer))) { 197 return std::move(E); 198 } 199 return std::move(Reader); 200 } 201 202 bool RawMemProfReader::hasFormat(const StringRef Path) { 203 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 204 if (!BufferOr) 205 return false; 206 207 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 208 return hasFormat(*Buffer); 209 } 210 211 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { 212 if (Buffer.getBufferSize() < sizeof(uint64_t)) 213 return false; 214 // Aligned read to sanity check that the buffer was allocated with at least 8b 215 // alignment. 216 const uint64_t Magic = alignedRead(Buffer.getBufferStart()); 217 return Magic == MEMPROF_RAW_MAGIC_64; 218 } 219 220 void RawMemProfReader::printYAML(raw_ostream &OS) { 221 uint64_t NumAllocFunctions = 0, NumMibInfo = 0; 222 for (const auto &KV : FunctionProfileData) { 223 const size_t NumAllocSites = KV.second.AllocSites.size(); 224 if (NumAllocSites > 0) { 225 NumAllocFunctions++; 226 NumMibInfo += NumAllocSites; 227 } 228 } 229 230 OS << "MemprofProfile:\n"; 231 OS << " Summary:\n"; 232 OS << " Version: " << MEMPROF_RAW_VERSION << "\n"; 233 OS << " NumSegments: " << SegmentInfo.size() << "\n"; 234 OS << " NumMibInfo: " << NumMibInfo << "\n"; 235 OS << " NumAllocFunctions: " << NumAllocFunctions << "\n"; 236 OS << " NumStackOffsets: " << StackMap.size() << "\n"; 237 // Print out the segment information. 238 OS << " Segments:\n"; 239 for (const auto &Entry : SegmentInfo) { 240 OS << " -\n"; 241 OS << " BuildId: " << getBuildIdString(Entry) << "\n"; 242 OS << " Start: 0x" << llvm::utohexstr(Entry.Start) << "\n"; 243 OS << " End: 0x" << llvm::utohexstr(Entry.End) << "\n"; 244 OS << " Offset: 0x" << llvm::utohexstr(Entry.Offset) << "\n"; 245 } 246 // Print out the merged contents of the profiles. 247 OS << " Records:\n"; 248 for (const auto &Entry : *this) { 249 OS << " -\n"; 250 OS << " FunctionGUID: " << Entry.first << "\n"; 251 Entry.second.print(OS); 252 } 253 } 254 255 Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) { 256 const StringRef FileName = Binary.getBinary()->getFileName(); 257 258 auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary()); 259 if (!ElfObject) { 260 return report(make_error<StringError>(Twine("Not an ELF file: "), 261 inconvertibleErrorCode()), 262 FileName); 263 } 264 265 // Check whether the profiled binary was built with position independent code 266 // (PIC). For now we provide a error message until symbolization support 267 // is added for pic. 268 auto* Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(ElfObject); 269 const llvm::object::ELF64LEFile& ElfFile = Elf64LEObject->getELFFile(); 270 auto PHdrsOr = ElfFile.program_headers(); 271 if(!PHdrsOr) 272 return report(make_error<StringError>(Twine("Could not read program headers: "), 273 inconvertibleErrorCode()), 274 FileName); 275 auto FirstLoadHeader = PHdrsOr->begin(); 276 while (FirstLoadHeader->p_type != llvm::ELF::PT_LOAD) 277 ++FirstLoadHeader; 278 if(FirstLoadHeader->p_vaddr == 0) 279 return report(make_error<StringError>(Twine("Unsupported position independent code"), 280 inconvertibleErrorCode()), 281 FileName); 282 283 auto Triple = ElfObject->makeTriple(); 284 if (!Triple.isX86()) 285 return report(make_error<StringError>(Twine("Unsupported target: ") + 286 Triple.getArchName(), 287 inconvertibleErrorCode()), 288 FileName); 289 290 auto *Object = cast<object::ObjectFile>(Binary.getBinary()); 291 std::unique_ptr<DIContext> Context = DWARFContext::create( 292 *Object, DWARFContext::ProcessDebugRelocations::Process); 293 294 auto SOFOr = symbolize::SymbolizableObjectFile::create( 295 Object, std::move(Context), /*UntagAddresses=*/false); 296 if (!SOFOr) 297 return report(SOFOr.takeError(), FileName); 298 Symbolizer = std::move(SOFOr.get()); 299 300 if (Error E = readRawProfile(std::move(DataBuffer))) 301 return E; 302 303 if (Error E = symbolizeAndFilterStackFrames()) 304 return E; 305 306 return mapRawProfileToRecords(); 307 } 308 309 Error RawMemProfReader::mapRawProfileToRecords() { 310 // Hold a mapping from function to each callsite location we encounter within 311 // it that is part of some dynamic allocation context. The location is stored 312 // as a pointer to a symbolized list of inline frames. 313 using LocationPtr = const llvm::SmallVector<FrameId> *; 314 llvm::MapVector<GlobalValue::GUID, llvm::SetVector<LocationPtr>> 315 PerFunctionCallSites; 316 317 // Convert the raw profile callstack data into memprof records. While doing so 318 // keep track of related contexts so that we can fill these in later. 319 for (const auto &Entry : CallstackProfileData) { 320 const uint64_t StackId = Entry.first; 321 322 auto It = StackMap.find(StackId); 323 if (It == StackMap.end()) 324 return make_error<InstrProfError>( 325 instrprof_error::malformed, 326 "memprof callstack record does not contain id: " + Twine(StackId)); 327 328 // Construct the symbolized callstack. 329 llvm::SmallVector<FrameId> Callstack; 330 Callstack.reserve(It->getSecond().size()); 331 332 llvm::ArrayRef<uint64_t> Addresses = It->getSecond(); 333 for (size_t I = 0; I < Addresses.size(); I++) { 334 const uint64_t Address = Addresses[I]; 335 assert(SymbolizedFrame.count(Address) > 0 && 336 "Address not found in SymbolizedFrame map"); 337 const SmallVector<FrameId> &Frames = SymbolizedFrame[Address]; 338 339 assert(!idToFrame(Frames.back()).IsInlineFrame && 340 "The last frame should not be inlined"); 341 342 // Record the callsites for each function. Skip the first frame of the 343 // first address since it is the allocation site itself that is recorded 344 // as an alloc site. 345 for (size_t J = 0; J < Frames.size(); J++) { 346 if (I == 0 && J == 0) 347 continue; 348 // We attach the entire bottom-up frame here for the callsite even 349 // though we only need the frames up to and including the frame for 350 // Frames[J].Function. This will enable better deduplication for 351 // compression in the future. 352 const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function; 353 PerFunctionCallSites[Guid].insert(&Frames); 354 } 355 356 // Add all the frames to the current allocation callstack. 357 Callstack.append(Frames.begin(), Frames.end()); 358 } 359 360 // We attach the memprof record to each function bottom-up including the 361 // first non-inline frame. 362 for (size_t I = 0; /*Break out using the condition below*/; I++) { 363 const Frame &F = idToFrame(Callstack[I]); 364 auto Result = 365 FunctionProfileData.insert({F.Function, IndexedMemProfRecord()}); 366 IndexedMemProfRecord &Record = Result.first->second; 367 Record.AllocSites.emplace_back(Callstack, Entry.second); 368 369 if (!F.IsInlineFrame) 370 break; 371 } 372 } 373 374 // Fill in the related callsites per function. 375 for (const auto &[Id, Locs] : PerFunctionCallSites) { 376 // Some functions may have only callsite data and no allocation data. Here 377 // we insert a new entry for callsite data if we need to. 378 auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()}); 379 IndexedMemProfRecord &Record = Result.first->second; 380 for (LocationPtr Loc : Locs) { 381 Record.CallSites.push_back(*Loc); 382 } 383 } 384 385 return Error::success(); 386 } 387 388 Error RawMemProfReader::symbolizeAndFilterStackFrames() { 389 // The specifier to use when symbolization is requested. 390 const DILineInfoSpecifier Specifier( 391 DILineInfoSpecifier::FileLineInfoKind::RawValue, 392 DILineInfoSpecifier::FunctionNameKind::LinkageName); 393 394 // For entries where all PCs in the callstack are discarded, we erase the 395 // entry from the stack map. 396 llvm::SmallVector<uint64_t> EntriesToErase; 397 // We keep track of all prior discarded entries so that we can avoid invoking 398 // the symbolizer for such entries. 399 llvm::DenseSet<uint64_t> AllVAddrsToDiscard; 400 for (auto &Entry : StackMap) { 401 for (const uint64_t VAddr : Entry.getSecond()) { 402 // Check if we have already symbolized and cached the result or if we 403 // don't want to attempt symbolization since we know this address is bad. 404 // In this case the address is also removed from the current callstack. 405 if (SymbolizedFrame.count(VAddr) > 0 || 406 AllVAddrsToDiscard.contains(VAddr)) 407 continue; 408 409 Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode( 410 getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false); 411 if (!DIOr) 412 return DIOr.takeError(); 413 DIInliningInfo DI = DIOr.get(); 414 415 // Drop frames which we can't symbolize or if they belong to the runtime. 416 if (DI.getFrame(0).FunctionName == DILineInfo::BadString || 417 isRuntimePath(DI.getFrame(0).FileName)) { 418 AllVAddrsToDiscard.insert(VAddr); 419 continue; 420 } 421 422 for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames; 423 I++) { 424 const auto &DIFrame = DI.getFrame(I); 425 const uint64_t Guid = 426 IndexedMemProfRecord::getGUID(DIFrame.FunctionName); 427 const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, 428 // Only the last entry is not an inlined location. 429 I != NumFrames - 1); 430 // Here we retain a mapping from the GUID to symbol name instead of 431 // adding it to the frame object directly to reduce memory overhead. 432 // This is because there can be many unique frames, particularly for 433 // callsite frames. 434 if (KeepSymbolName) 435 GuidToSymbolName.insert({Guid, DIFrame.FunctionName}); 436 437 const FrameId Hash = F.hash(); 438 IdToFrame.insert({Hash, F}); 439 SymbolizedFrame[VAddr].push_back(Hash); 440 } 441 } 442 443 auto &CallStack = Entry.getSecond(); 444 llvm::erase_if(CallStack, [&AllVAddrsToDiscard](const uint64_t A) { 445 return AllVAddrsToDiscard.contains(A); 446 }); 447 if (CallStack.empty()) 448 EntriesToErase.push_back(Entry.getFirst()); 449 } 450 451 // Drop the entries where the callstack is empty. 452 for (const uint64_t Id : EntriesToErase) { 453 StackMap.erase(Id); 454 CallstackProfileData.erase(Id); 455 } 456 457 if (StackMap.empty()) 458 return make_error<InstrProfError>( 459 instrprof_error::malformed, 460 "no entries in callstack map after symbolization"); 461 462 return Error::success(); 463 } 464 465 Error RawMemProfReader::readRawProfile( 466 std::unique_ptr<MemoryBuffer> DataBuffer) { 467 const char *Next = DataBuffer->getBufferStart(); 468 469 while (Next < DataBuffer->getBufferEnd()) { 470 auto *Header = reinterpret_cast<const memprof::Header *>(Next); 471 472 // Read in the segment information, check whether its the same across all 473 // profiles in this binary file. 474 const llvm::SmallVector<SegmentEntry> Entries = 475 readSegmentEntries(Next + Header->SegmentOffset); 476 if (!SegmentInfo.empty() && SegmentInfo != Entries) { 477 // We do not expect segment information to change when deserializing from 478 // the same binary profile file. This can happen if dynamic libraries are 479 // loaded/unloaded between profile dumping. 480 return make_error<InstrProfError>( 481 instrprof_error::malformed, 482 "memprof raw profile has different segment information"); 483 } 484 SegmentInfo.assign(Entries.begin(), Entries.end()); 485 486 // Read in the MemInfoBlocks. Merge them based on stack id - we assume that 487 // raw profiles in the same binary file are from the same process so the 488 // stackdepot ids are the same. 489 for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) { 490 if (CallstackProfileData.count(Value.first)) { 491 CallstackProfileData[Value.first].Merge(Value.second); 492 } else { 493 CallstackProfileData[Value.first] = Value.second; 494 } 495 } 496 497 // Read in the callstack for each ids. For multiple raw profiles in the same 498 // file, we expect that the callstack is the same for a unique id. 499 const CallStackMap CSM = readStackInfo(Next + Header->StackOffset); 500 if (StackMap.empty()) { 501 StackMap = CSM; 502 } else { 503 if (mergeStackMap(CSM, StackMap)) 504 return make_error<InstrProfError>( 505 instrprof_error::malformed, 506 "memprof raw profile got different call stack for same id"); 507 } 508 509 Next += Header->TotalSize; 510 } 511 512 return Error::success(); 513 } 514 515 object::SectionedAddress 516 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { 517 LLVM_DEBUG({ 518 SegmentEntry *ContainingSegment = nullptr; 519 for (auto &SE : SegmentInfo) { 520 if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) { 521 ContainingSegment = &SE; 522 } 523 } 524 525 // Ensure that the virtual address is valid. 526 assert(ContainingSegment && "Could not find a segment entry"); 527 }); 528 529 // TODO: Compute the file offset based on the maps and program headers. For 530 // now this only works for non PIE binaries. 531 return object::SectionedAddress{VirtualAddress}; 532 } 533 534 Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) { 535 if (FunctionProfileData.empty()) 536 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 537 538 if (Iter == FunctionProfileData.end()) 539 return make_error<InstrProfError>(instrprof_error::eof); 540 541 auto IdToFrameCallback = [this](const FrameId Id) { 542 Frame F = this->idToFrame(Id); 543 if (!this->KeepSymbolName) 544 return F; 545 auto Iter = this->GuidToSymbolName.find(F.Function); 546 assert(Iter != this->GuidToSymbolName.end()); 547 F.SymbolName = Iter->getSecond(); 548 return F; 549 }; 550 551 const IndexedMemProfRecord &IndexedRecord = Iter->second; 552 GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)}; 553 Iter++; 554 return Error::success(); 555 } 556 } // namespace memprof 557 } // namespace llvm 558