1 #ifndef LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ 2 #define LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ 3 //===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===// 4 // 5 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6 // See https://llvm.org/LICENSE.txt for license information. 7 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8 // 9 //===----------------------------------------------------------------------===// 10 // 11 // This file contains support for reading MemProf profiling data. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/MapVector.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 19 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/Object/Binary.h" 22 #include "llvm/Object/ObjectFile.h" 23 #include "llvm/ProfileData/InstrProfReader.h" 24 #include "llvm/ProfileData/MemProf.h" 25 #include "llvm/ProfileData/MemProfData.inc" 26 #include "llvm/Support/Error.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 29 #include <functional> 30 31 namespace llvm { 32 namespace memprof { 33 // A class for memprof profile data populated directly from external 34 // sources. 35 // TODO: Rename this file to MemProfReader.h to better reflect the contents. 36 class MemProfReader { 37 public: 38 // The MemProfReader only holds memory profile information. 39 InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; } 40 41 using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>; 42 using Iterator = InstrProfIterator<GuidMemProfRecordPair, MemProfReader>; 43 Iterator end() { return Iterator(); } 44 Iterator begin() { 45 Iter = FunctionProfileData.begin(); 46 return Iterator(this); 47 } 48 49 // Return a const reference to the internal Id to Frame mappings. 50 const llvm::DenseMap<FrameId, Frame> &getFrameMapping() const { 51 return IdToFrame; 52 } 53 54 // Return a const reference to the internal function profile data. 55 const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> & 56 getProfileData() const { 57 return FunctionProfileData; 58 } 59 60 virtual Error 61 readNextRecord(GuidMemProfRecordPair &GuidRecord, 62 std::function<const Frame(const FrameId)> Callback = nullptr) { 63 if (FunctionProfileData.empty()) 64 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 65 66 if (Iter == FunctionProfileData.end()) 67 return make_error<InstrProfError>(instrprof_error::eof); 68 69 if (Callback == nullptr) 70 Callback = 71 std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1); 72 73 const IndexedMemProfRecord &IndexedRecord = Iter->second; 74 GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, Callback)}; 75 Iter++; 76 return Error::success(); 77 } 78 79 // Allow default construction for derived classes which can populate the 80 // contents after construction. 81 MemProfReader() = default; 82 virtual ~MemProfReader() = default; 83 84 // Initialize the MemProfReader with the frame mappings and profile contents. 85 MemProfReader( 86 llvm::DenseMap<FrameId, Frame> FrameIdMap, 87 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData) 88 : IdToFrame(std::move(FrameIdMap)), 89 FunctionProfileData(std::move(ProfData)) {} 90 91 protected: 92 // A helper method to extract the frame from the IdToFrame map. 93 const Frame &idToFrame(const FrameId Id) const { 94 auto It = IdToFrame.find(Id); 95 assert(It != IdToFrame.end() && "Id not found in map."); 96 return It->getSecond(); 97 } 98 // A mapping from FrameId (a hash of the contents) to the frame. 99 llvm::DenseMap<FrameId, Frame> IdToFrame; 100 // A mapping from function GUID, hash of the canonical function symbol to the 101 // memprof profile data for that function, i.e allocation and callsite info. 102 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData; 103 // An iterator to the internal function profile data structure. 104 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter; 105 }; 106 107 // Map from id (recorded from sanitizer stack depot) to virtual addresses for 108 // each program counter address in the callstack. 109 using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>; 110 111 // Specializes the MemProfReader class to populate the contents from raw binary 112 // memprof profiles from instrumentation based profiling. 113 class RawMemProfReader final : public MemProfReader { 114 public: 115 RawMemProfReader(const RawMemProfReader &) = delete; 116 RawMemProfReader &operator=(const RawMemProfReader &) = delete; 117 virtual ~RawMemProfReader() override = default; 118 119 // Prints the contents of the profile in YAML format. 120 void printYAML(raw_ostream &OS); 121 122 // Return true if the \p DataBuffer starts with magic bytes indicating it is 123 // a raw binary memprof profile. 124 static bool hasFormat(const MemoryBuffer &DataBuffer); 125 // Return true if the file at \p Path starts with magic bytes indicating it is 126 // a raw binary memprof profile. 127 static bool hasFormat(const StringRef Path); 128 129 // Create a RawMemProfReader after sanity checking the contents of the file at 130 // \p Path or the \p Buffer. The binary from which the profile has been 131 // collected is specified via a path in \p ProfiledBinary. 132 static Expected<std::unique_ptr<RawMemProfReader>> 133 create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false); 134 static Expected<std::unique_ptr<RawMemProfReader>> 135 create(std::unique_ptr<MemoryBuffer> Buffer, StringRef ProfiledBinary, 136 bool KeepName = false); 137 138 // Returns a list of build ids recorded in the segment information. 139 static std::vector<std::string> peekBuildIds(MemoryBuffer *DataBuffer); 140 141 virtual Error 142 readNextRecord(GuidMemProfRecordPair &GuidRecord, 143 std::function<const Frame(const FrameId)> Callback) override; 144 145 // Constructor for unittests only. 146 RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym, 147 llvm::SmallVectorImpl<SegmentEntry> &Seg, 148 llvm::MapVector<uint64_t, MemInfoBlock> &Prof, 149 CallStackMap &SM, bool KeepName = false) 150 : Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()), 151 CallstackProfileData(Prof), StackMap(SM), KeepSymbolName(KeepName) { 152 // We don't call initialize here since there is no raw profile to read. The 153 // test should pass in the raw profile as structured data. 154 155 // If there is an error here then the mock symbolizer has not been 156 // initialized properly. 157 if (Error E = symbolizeAndFilterStackFrames()) 158 report_fatal_error(std::move(E)); 159 if (Error E = mapRawProfileToRecords()) 160 report_fatal_error(std::move(E)); 161 } 162 163 private: 164 RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName) 165 : Binary(std::move(Bin)), KeepSymbolName(KeepName) {} 166 // Initializes the RawMemProfReader with the contents in `DataBuffer`. 167 Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer); 168 // Read and parse the contents of the `DataBuffer` as a binary format profile. 169 Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer); 170 // Initialize the segment mapping information for symbolization. 171 Error setupForSymbolization(); 172 // Symbolize and cache all the virtual addresses we encounter in the 173 // callstacks from the raw profile. Also prune callstack frames which we can't 174 // symbolize or those that belong to the runtime. For profile entries where 175 // the entire callstack is pruned, we drop the entry from the profile. 176 Error symbolizeAndFilterStackFrames(); 177 // Construct memprof records for each function and store it in the 178 // `FunctionProfileData` map. A function may have allocation profile data or 179 // callsite data or both. 180 Error mapRawProfileToRecords(); 181 182 object::SectionedAddress getModuleOffset(uint64_t VirtualAddress); 183 184 // The profiled binary. 185 object::OwningBinary<object::Binary> Binary; 186 // A symbolizer to translate virtual addresses to code locations. 187 std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer; 188 // The preferred load address of the executable segment. 189 uint64_t PreferredTextSegmentAddress = 0; 190 // The base address of the text segment in the process during profiling. 191 uint64_t ProfiledTextSegmentStart = 0; 192 // The limit address of the text segment in the process during profiling. 193 uint64_t ProfiledTextSegmentEnd = 0; 194 195 // The memory mapped segment information for all executable segments in the 196 // profiled binary (filtered from the raw profile using the build id). 197 llvm::SmallVector<SegmentEntry, 2> SegmentInfo; 198 199 // A map from callstack id (same as key in CallStackMap below) to the heap 200 // information recorded for that allocation context. 201 llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData; 202 CallStackMap StackMap; 203 204 // Cached symbolization from PC to Frame. 205 llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame; 206 207 // Whether to keep the symbol name for each frame after hashing. 208 bool KeepSymbolName = false; 209 // A mapping of the hash to symbol name, only used if KeepSymbolName is true. 210 llvm::DenseMap<uint64_t, std::string> GuidToSymbolName; 211 }; 212 } // namespace memprof 213 } // namespace llvm 214 215 #endif // LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ 216