1 #ifndef LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ 2 #define LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ 3 //===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===// 4 // 5 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6 // See https://llvm.org/LICENSE.txt for license information. 7 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8 // 9 //===----------------------------------------------------------------------===// 10 // 11 // This file contains support for reading MemProf profiling data. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/MapVector.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 19 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/Object/Binary.h" 22 #include "llvm/Object/ObjectFile.h" 23 #include "llvm/ProfileData/InstrProfReader.h" 24 #include "llvm/ProfileData/MemProf.h" 25 #include "llvm/ProfileData/MemProfData.inc" 26 #include "llvm/Support/Error.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 29 #include <cstddef> 30 31 namespace llvm { 32 namespace memprof { 33 34 // Map from id (recorded from sanitizer stack depot) to virtual addresses for 35 // each program counter address in the callstack. 36 using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>; 37 38 class RawMemProfReader { 39 public: 40 RawMemProfReader(const RawMemProfReader &) = delete; 41 RawMemProfReader &operator=(const RawMemProfReader &) = delete; 42 43 // Prints the contents of the profile in YAML format. 44 void printYAML(raw_ostream &OS); 45 46 // Return true if the \p DataBuffer starts with magic bytes indicating it is 47 // a raw binary memprof profile. 48 static bool hasFormat(const MemoryBuffer &DataBuffer); 49 // Return true if the file at \p Path starts with magic bytes indicating it is 50 // a raw binary memprof profile. 51 static bool hasFormat(const StringRef Path); 52 53 // Create a RawMemProfReader after sanity checking the contents of the file at 54 // \p Path or the \p Buffer. The binary from which the profile has been 55 // collected is specified via a path in \p ProfiledBinary. 56 static Expected<std::unique_ptr<RawMemProfReader>> 57 create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false); 58 static Expected<std::unique_ptr<RawMemProfReader>> 59 create(std::unique_ptr<MemoryBuffer> Buffer, StringRef ProfiledBinary, 60 bool KeepName = false); 61 62 // Returns a list of build ids recorded in the segment information. 63 static std::vector<std::string> peekBuildIds(MemoryBuffer *DataBuffer); 64 65 using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>; 66 using Iterator = InstrProfIterator<GuidMemProfRecordPair, RawMemProfReader>; 67 Iterator end() { return Iterator(); } 68 Iterator begin() { 69 Iter = FunctionProfileData.begin(); 70 return Iterator(this); 71 } 72 73 Error readNextRecord(GuidMemProfRecordPair &GuidRecord); 74 75 // The RawMemProfReader only holds memory profile information. 76 InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; } 77 78 // Constructor for unittests only. 79 RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym, 80 llvm::SmallVectorImpl<SegmentEntry> &Seg, 81 llvm::MapVector<uint64_t, MemInfoBlock> &Prof, 82 CallStackMap &SM, bool KeepName = false) 83 : Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()), 84 CallstackProfileData(Prof), StackMap(SM), KeepSymbolName(KeepName) { 85 // We don't call initialize here since there is no raw profile to read. The 86 // test should pass in the raw profile as structured data. 87 88 // If there is an error here then the mock symbolizer has not been 89 // initialized properly. 90 if (Error E = symbolizeAndFilterStackFrames()) 91 report_fatal_error(std::move(E)); 92 if (Error E = mapRawProfileToRecords()) 93 report_fatal_error(std::move(E)); 94 } 95 96 // Return a const reference to the internal Id to Frame mappings. 97 const llvm::DenseMap<FrameId, Frame> &getFrameMapping() const { 98 return IdToFrame; 99 } 100 101 // Return a const reference to the internal function profile data. 102 const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> & 103 getProfileData() const { 104 return FunctionProfileData; 105 } 106 107 private: 108 RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName) 109 : Binary(std::move(Bin)), KeepSymbolName(KeepName) {} 110 // Initializes the RawMemProfReader with the contents in `DataBuffer`. 111 Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer); 112 // Read and parse the contents of the `DataBuffer` as a binary format profile. 113 Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer); 114 // Initialize the segment mapping information for symbolization. 115 Error setupForSymbolization(); 116 // Symbolize and cache all the virtual addresses we encounter in the 117 // callstacks from the raw profile. Also prune callstack frames which we can't 118 // symbolize or those that belong to the runtime. For profile entries where 119 // the entire callstack is pruned, we drop the entry from the profile. 120 Error symbolizeAndFilterStackFrames(); 121 // Construct memprof records for each function and store it in the 122 // `FunctionProfileData` map. A function may have allocation profile data or 123 // callsite data or both. 124 Error mapRawProfileToRecords(); 125 126 // A helper method to extract the frame from the IdToFrame map. 127 const Frame &idToFrame(const FrameId Id) const { 128 auto It = IdToFrame.find(Id); 129 assert(It != IdToFrame.end() && "Id not found in map."); 130 return It->getSecond(); 131 } 132 133 object::SectionedAddress getModuleOffset(uint64_t VirtualAddress); 134 135 // The profiled binary. 136 object::OwningBinary<object::Binary> Binary; 137 // A symbolizer to translate virtual addresses to code locations. 138 std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer; 139 // The preferred load address of the executable segment. 140 uint64_t PreferredTextSegmentAddress = 0; 141 // The base address of the text segment in the process during profiling. 142 uint64_t ProfiledTextSegmentStart = 0; 143 // The limit address of the text segment in the process during profiling. 144 uint64_t ProfiledTextSegmentEnd = 0; 145 146 // The memory mapped segment information for all executable segments in the 147 // profiled binary (filtered from the raw profile using the build id). 148 llvm::SmallVector<SegmentEntry, 2> SegmentInfo; 149 150 // A map from callstack id (same as key in CallStackMap below) to the heap 151 // information recorded for that allocation context. 152 llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData; 153 CallStackMap StackMap; 154 155 // Cached symbolization from PC to Frame. 156 llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame; 157 llvm::DenseMap<FrameId, Frame> IdToFrame; 158 159 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData; 160 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter; 161 162 // Whether to keep the symbol name for each frame after hashing. 163 bool KeepSymbolName = false; 164 // A mapping of the hash to symbol name, only used if KeepSymbolName is true. 165 llvm::DenseMap<uint64_t, std::string> GuidToSymbolName; 166 }; 167 } // namespace memprof 168 } // namespace llvm 169 170 #endif // LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ 171