1 #ifndef LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
2 #define LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
3 //===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===//
4 //
5 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 // See https://llvm.org/LICENSE.txt for license information.
7 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 // This file contains support for reading MemProf profiling data.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
19 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
20 #include "llvm/IR/GlobalValue.h"
21 #include "llvm/Object/Binary.h"
22 #include "llvm/Object/ObjectFile.h"
23 #include "llvm/ProfileData/InstrProfReader.h"
24 #include "llvm/ProfileData/MemProf.h"
25 #include "llvm/ProfileData/MemProfData.inc"
26 #include "llvm/Support/Error.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 
29 #include <cstddef>
30 
31 namespace llvm {
32 namespace memprof {
33 
34 // Map from id (recorded from sanitizer stack depot) to virtual addresses for
35 // each program counter address in the callstack.
36 using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>;
37 
38 class RawMemProfReader {
39 public:
40   RawMemProfReader(const RawMemProfReader &) = delete;
41   RawMemProfReader &operator=(const RawMemProfReader &) = delete;
42 
43   // Prints the contents of the profile in YAML format.
44   void printYAML(raw_ostream &OS);
45 
46   // Return true if the \p DataBuffer starts with magic bytes indicating it is
47   // a raw binary memprof profile.
48   static bool hasFormat(const MemoryBuffer &DataBuffer);
49   // Return true if the file at \p Path starts with magic bytes indicating it is
50   // a raw binary memprof profile.
51   static bool hasFormat(const StringRef Path);
52 
53   // Create a RawMemProfReader after sanity checking the contents of the file at
54   // \p Path or the \p Buffer. The binary from which the profile has been
55   // collected is specified via a path in \p ProfiledBinary.
56   static Expected<std::unique_ptr<RawMemProfReader>>
57   create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false);
58   static Expected<std::unique_ptr<RawMemProfReader>>
59   create(std::unique_ptr<MemoryBuffer> Buffer, StringRef ProfiledBinary,
60          bool KeepName = false);
61 
62   // Returns a list of build ids recorded in the segment information.
63   static std::vector<std::string> peekBuildIds(MemoryBuffer *DataBuffer);
64 
65   using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>;
66   using Iterator = InstrProfIterator<GuidMemProfRecordPair, RawMemProfReader>;
67   Iterator end() { return Iterator(); }
68   Iterator begin() {
69     Iter = FunctionProfileData.begin();
70     return Iterator(this);
71   }
72 
73   Error readNextRecord(GuidMemProfRecordPair &GuidRecord);
74 
75   // The RawMemProfReader only holds memory profile information.
76   InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; }
77 
78   // Constructor for unittests only.
79   RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym,
80                    llvm::SmallVectorImpl<SegmentEntry> &Seg,
81                    llvm::MapVector<uint64_t, MemInfoBlock> &Prof,
82                    CallStackMap &SM, bool KeepName = false)
83       : Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()),
84         CallstackProfileData(Prof), StackMap(SM), KeepSymbolName(KeepName) {
85     // We don't call initialize here since there is no raw profile to read. The
86     // test should pass in the raw profile as structured data.
87 
88     // If there is an error here then the mock symbolizer has not been
89     // initialized properly.
90     if (Error E = symbolizeAndFilterStackFrames())
91       report_fatal_error(std::move(E));
92     if (Error E = mapRawProfileToRecords())
93       report_fatal_error(std::move(E));
94   }
95 
96   // Return a const reference to the internal Id to Frame mappings.
97   const llvm::DenseMap<FrameId, Frame> &getFrameMapping() const {
98     return IdToFrame;
99   }
100 
101   // Return a const reference to the internal function profile data.
102   const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> &
103   getProfileData() const {
104     return FunctionProfileData;
105   }
106 
107 private:
108   RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName)
109       : Binary(std::move(Bin)), KeepSymbolName(KeepName) {}
110   // Initializes the RawMemProfReader with the contents in `DataBuffer`.
111   Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer);
112   // Read and parse the contents of the `DataBuffer` as a binary format profile.
113   Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer);
114   // Initialize the segment mapping information for symbolization.
115   Error setupForSymbolization();
116   // Symbolize and cache all the virtual addresses we encounter in the
117   // callstacks from the raw profile. Also prune callstack frames which we can't
118   // symbolize or those that belong to the runtime. For profile entries where
119   // the entire callstack is pruned, we drop the entry from the profile.
120   Error symbolizeAndFilterStackFrames();
121   // Construct memprof records for each function and store it in the
122   // `FunctionProfileData` map. A function may have allocation profile data or
123   // callsite data or both.
124   Error mapRawProfileToRecords();
125 
126   // A helper method to extract the frame from the IdToFrame map.
127   const Frame &idToFrame(const FrameId Id) const {
128     auto It = IdToFrame.find(Id);
129     assert(It != IdToFrame.end() && "Id not found in map.");
130     return It->getSecond();
131   }
132 
133   object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
134 
135   // The profiled binary.
136   object::OwningBinary<object::Binary> Binary;
137   // A symbolizer to translate virtual addresses to code locations.
138   std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer;
139   // The preferred load address of the executable segment.
140   uint64_t PreferredTextSegmentAddress = 0;
141   // The base address of the text segment in the process during profiling.
142   uint64_t ProfiledTextSegmentStart = 0;
143   // The limit address of the text segment in the process during profiling.
144   uint64_t ProfiledTextSegmentEnd = 0;
145 
146   // The memory mapped segment information for all executable segments in the
147   // profiled binary (filtered from the raw profile using the build id).
148   llvm::SmallVector<SegmentEntry, 2> SegmentInfo;
149 
150   // A map from callstack id (same as key in CallStackMap below) to the heap
151   // information recorded for that allocation context.
152   llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData;
153   CallStackMap StackMap;
154 
155   // Cached symbolization from PC to Frame.
156   llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame;
157   llvm::DenseMap<FrameId, Frame> IdToFrame;
158 
159   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData;
160   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter;
161 
162   // Whether to keep the symbol name for each frame after hashing.
163   bool KeepSymbolName = false;
164   // A mapping of the hash to symbol name, only used if KeepSymbolName is true.
165   llvm::DenseMap<uint64_t, std::string> GuidToSymbolName;
166 };
167 } // namespace memprof
168 } // namespace llvm
169 
170 #endif // LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
171