1 #ifndef LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
2 #define LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
3 //===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===//
4 //
5 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 // See https://llvm.org/LICENSE.txt for license information.
7 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 // This file contains support for reading MemProf profiling data.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
19 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
20 #include "llvm/IR/GlobalValue.h"
21 #include "llvm/Object/Binary.h"
22 #include "llvm/Object/ObjectFile.h"
23 #include "llvm/ProfileData/InstrProfReader.h"
24 #include "llvm/ProfileData/MemProf.h"
25 #include "llvm/ProfileData/MemProfData.inc"
26 #include "llvm/Support/Error.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 
29 #include <functional>
30 
31 namespace llvm {
32 namespace memprof {
33 // A class for memprof profile data populated directly from external
34 // sources.
35 // TODO: Rename this file to MemProfReader.h to better reflect the contents.
36 class MemProfReader {
37 public:
38   // The MemProfReader only holds memory profile information.
39   InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; }
40 
41   using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>;
42   using Iterator = InstrProfIterator<GuidMemProfRecordPair, MemProfReader>;
43   Iterator end() { return Iterator(); }
44   Iterator begin() {
45     Iter = FunctionProfileData.begin();
46     return Iterator(this);
47   }
48 
49   // Return a const reference to the internal Id to Frame mappings.
50   const llvm::DenseMap<FrameId, Frame> &getFrameMapping() const {
51     return IdToFrame;
52   }
53 
54   // Return a const reference to the internal function profile data.
55   const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> &
56   getProfileData() const {
57     return FunctionProfileData;
58   }
59 
60   virtual Error
61   readNextRecord(GuidMemProfRecordPair &GuidRecord,
62                  std::function<const Frame(const FrameId)> Callback = nullptr) {
63     if (FunctionProfileData.empty())
64       return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
65 
66     if (Iter == FunctionProfileData.end())
67       return make_error<InstrProfError>(instrprof_error::eof);
68 
69     if (Callback == nullptr)
70       Callback =
71           std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1);
72 
73     const IndexedMemProfRecord &IndexedRecord = Iter->second;
74     GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, Callback)};
75     Iter++;
76     return Error::success();
77   }
78 
79   // Allow default construction for derived classes which can populate the
80   // contents after construction.
81   MemProfReader() = default;
82   virtual ~MemProfReader() = default;
83 
84   // Initialize the MemProfReader with the frame mappings and profile contents.
85   MemProfReader(
86       llvm::DenseMap<FrameId, Frame> FrameIdMap,
87       llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData)
88       : IdToFrame(std::move(FrameIdMap)),
89         FunctionProfileData(std::move(ProfData)) {}
90 
91 protected:
92   // A helper method to extract the frame from the IdToFrame map.
93   const Frame &idToFrame(const FrameId Id) const {
94     auto It = IdToFrame.find(Id);
95     assert(It != IdToFrame.end() && "Id not found in map.");
96     return It->getSecond();
97   }
98   // A mapping from FrameId (a hash of the contents) to the frame.
99   llvm::DenseMap<FrameId, Frame> IdToFrame;
100   // A mapping from function GUID, hash of the canonical function symbol to the
101   // memprof profile data for that function, i.e allocation and callsite info.
102   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData;
103   // An iterator to the internal function profile data structure.
104   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter;
105 };
106 
107 // Map from id (recorded from sanitizer stack depot) to virtual addresses for
108 // each program counter address in the callstack.
109 using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>;
110 
111 // Specializes the MemProfReader class to populate the contents from raw binary
112 // memprof profiles from instrumentation based profiling.
113 class RawMemProfReader final : public MemProfReader {
114 public:
115   RawMemProfReader(const RawMemProfReader &) = delete;
116   RawMemProfReader &operator=(const RawMemProfReader &) = delete;
117   virtual ~RawMemProfReader() override = default;
118 
119   // Prints the contents of the profile in YAML format.
120   void printYAML(raw_ostream &OS);
121 
122   // Return true if the \p DataBuffer starts with magic bytes indicating it is
123   // a raw binary memprof profile.
124   static bool hasFormat(const MemoryBuffer &DataBuffer);
125   // Return true if the file at \p Path starts with magic bytes indicating it is
126   // a raw binary memprof profile.
127   static bool hasFormat(const StringRef Path);
128 
129   // Create a RawMemProfReader after sanity checking the contents of the file at
130   // \p Path or the \p Buffer. The binary from which the profile has been
131   // collected is specified via a path in \p ProfiledBinary.
132   static Expected<std::unique_ptr<RawMemProfReader>>
133   create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false);
134   static Expected<std::unique_ptr<RawMemProfReader>>
135   create(std::unique_ptr<MemoryBuffer> Buffer, StringRef ProfiledBinary,
136          bool KeepName = false);
137 
138   // Returns a list of build ids recorded in the segment information.
139   static std::vector<std::string> peekBuildIds(MemoryBuffer *DataBuffer);
140 
141   virtual Error
142   readNextRecord(GuidMemProfRecordPair &GuidRecord,
143                  std::function<const Frame(const FrameId)> Callback) override;
144 
145   // Constructor for unittests only.
146   RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym,
147                    llvm::SmallVectorImpl<SegmentEntry> &Seg,
148                    llvm::MapVector<uint64_t, MemInfoBlock> &Prof,
149                    CallStackMap &SM, bool KeepName = false)
150       : Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()),
151         CallstackProfileData(Prof), StackMap(SM), KeepSymbolName(KeepName) {
152     // We don't call initialize here since there is no raw profile to read. The
153     // test should pass in the raw profile as structured data.
154 
155     // If there is an error here then the mock symbolizer has not been
156     // initialized properly.
157     if (Error E = symbolizeAndFilterStackFrames())
158       report_fatal_error(std::move(E));
159     if (Error E = mapRawProfileToRecords())
160       report_fatal_error(std::move(E));
161   }
162 
163 private:
164   RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName)
165       : Binary(std::move(Bin)), KeepSymbolName(KeepName) {}
166   // Initializes the RawMemProfReader with the contents in `DataBuffer`.
167   Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer);
168   // Read and parse the contents of the `DataBuffer` as a binary format profile.
169   Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer);
170   // Initialize the segment mapping information for symbolization.
171   Error setupForSymbolization();
172   // Symbolize and cache all the virtual addresses we encounter in the
173   // callstacks from the raw profile. Also prune callstack frames which we can't
174   // symbolize or those that belong to the runtime. For profile entries where
175   // the entire callstack is pruned, we drop the entry from the profile.
176   Error symbolizeAndFilterStackFrames();
177   // Construct memprof records for each function and store it in the
178   // `FunctionProfileData` map. A function may have allocation profile data or
179   // callsite data or both.
180   Error mapRawProfileToRecords();
181 
182   object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
183 
184   // The profiled binary.
185   object::OwningBinary<object::Binary> Binary;
186   // A symbolizer to translate virtual addresses to code locations.
187   std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer;
188   // The preferred load address of the executable segment.
189   uint64_t PreferredTextSegmentAddress = 0;
190   // The base address of the text segment in the process during profiling.
191   uint64_t ProfiledTextSegmentStart = 0;
192   // The limit address of the text segment in the process during profiling.
193   uint64_t ProfiledTextSegmentEnd = 0;
194 
195   // The memory mapped segment information for all executable segments in the
196   // profiled binary (filtered from the raw profile using the build id).
197   llvm::SmallVector<SegmentEntry, 2> SegmentInfo;
198 
199   // A map from callstack id (same as key in CallStackMap below) to the heap
200   // information recorded for that allocation context.
201   llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData;
202   CallStackMap StackMap;
203 
204   // Cached symbolization from PC to Frame.
205   llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame;
206 
207   // Whether to keep the symbol name for each frame after hashing.
208   bool KeepSymbolName = false;
209   // A mapping of the hash to symbol name, only used if KeepSymbolName is true.
210   llvm::DenseMap<uint64_t, std::string> GuidToSymbolName;
211 };
212 } // namespace memprof
213 } // namespace llvm
214 
215 #endif // LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
216