1 #ifndef LLVM_PROFILEDATA_MEMPROF_H_
2 #define LLVM_PROFILEDATA_MEMPROF_H_
3 
4 #include "llvm/ADT/STLFunctionalExtras.h"
5 #include "llvm/ADT/SmallVector.h"
6 #include "llvm/IR/GlobalValue.h"
7 #include "llvm/ProfileData/MemProfData.inc"
8 #include "llvm/Support/Endian.h"
9 #include "llvm/Support/EndianStream.h"
10 #include "llvm/Support/raw_ostream.h"
11 
12 #include <cstdint>
13 #include <optional>
14 
15 namespace llvm {
16 namespace memprof {
17 
18 enum class Meta : uint64_t {
19   Start = 0,
20 #define MIBEntryDef(NameTag, Name, Type) NameTag,
21 #include "llvm/ProfileData/MIBEntryDef.inc"
22 #undef MIBEntryDef
23   Size
24 };
25 
26 using MemProfSchema = llvm::SmallVector<Meta, static_cast<int>(Meta::Size)>;
27 
28 // Holds the actual MemInfoBlock data with all fields. Contents may be read or
29 // written partially by providing an appropriate schema to the serialize and
30 // deserialize methods.
31 struct PortableMemInfoBlock {
32   PortableMemInfoBlock() = default;
33   explicit PortableMemInfoBlock(const MemInfoBlock &Block) {
34 #define MIBEntryDef(NameTag, Name, Type) Name = Block.Name;
35 #include "llvm/ProfileData/MIBEntryDef.inc"
36 #undef MIBEntryDef
37   }
38 
39   PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr) {
40     deserialize(Schema, Ptr);
41   }
42 
43   // Read the contents of \p Ptr based on the \p Schema to populate the
44   // MemInfoBlock member.
45   void deserialize(const MemProfSchema &Schema, const unsigned char *Ptr) {
46     using namespace support;
47 
48     for (const Meta Id : Schema) {
49       switch (Id) {
50 #define MIBEntryDef(NameTag, Name, Type)                                       \
51   case Meta::Name: {                                                           \
52     Name = endian::readNext<Type, llvm::endianness::little, unaligned>(Ptr);   \
53   } break;
54 #include "llvm/ProfileData/MIBEntryDef.inc"
55 #undef MIBEntryDef
56       default:
57         llvm_unreachable("Unknown meta type id, is the profile collected from "
58                          "a newer version of the runtime?");
59       }
60     }
61   }
62 
63   // Write the contents of the MemInfoBlock based on the \p Schema provided to
64   // the raw_ostream \p OS.
65   void serialize(const MemProfSchema &Schema, raw_ostream &OS) const {
66     using namespace support;
67 
68     endian::Writer LE(OS, llvm::endianness::little);
69     for (const Meta Id : Schema) {
70       switch (Id) {
71 #define MIBEntryDef(NameTag, Name, Type)                                       \
72   case Meta::Name: {                                                           \
73     LE.write<Type>(Name);                                                      \
74   } break;
75 #include "llvm/ProfileData/MIBEntryDef.inc"
76 #undef MIBEntryDef
77       default:
78         llvm_unreachable("Unknown meta type id, invalid input?");
79       }
80     }
81   }
82 
83   // Print out the contents of the MemInfoBlock in YAML format.
84   void printYAML(raw_ostream &OS) const {
85     OS << "      MemInfoBlock:\n";
86 #define MIBEntryDef(NameTag, Name, Type)                                       \
87   OS << "        " << #Name << ": " << Name << "\n";
88 #include "llvm/ProfileData/MIBEntryDef.inc"
89 #undef MIBEntryDef
90   }
91 
92   // Define getters for each type which can be called by analyses.
93 #define MIBEntryDef(NameTag, Name, Type)                                       \
94   Type get##Name() const { return Name; }
95 #include "llvm/ProfileData/MIBEntryDef.inc"
96 #undef MIBEntryDef
97 
98   void clear() { *this = PortableMemInfoBlock(); }
99 
100   // Returns the full schema currently in use.
101   static MemProfSchema getSchema() {
102     MemProfSchema List;
103 #define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name);
104 #include "llvm/ProfileData/MIBEntryDef.inc"
105 #undef MIBEntryDef
106     return List;
107   }
108 
109   bool operator==(const PortableMemInfoBlock &Other) const {
110 #define MIBEntryDef(NameTag, Name, Type)                                       \
111   if (Other.get##Name() != get##Name())                                        \
112     return false;
113 #include "llvm/ProfileData/MIBEntryDef.inc"
114 #undef MIBEntryDef
115     return true;
116   }
117 
118   bool operator!=(const PortableMemInfoBlock &Other) const {
119     return !operator==(Other);
120   }
121 
122   static constexpr size_t serializedSize() {
123     size_t Result = 0;
124 #define MIBEntryDef(NameTag, Name, Type) Result += sizeof(Type);
125 #include "llvm/ProfileData/MIBEntryDef.inc"
126 #undef MIBEntryDef
127     return Result;
128   }
129 
130 private:
131 #define MIBEntryDef(NameTag, Name, Type) Type Name = Type();
132 #include "llvm/ProfileData/MIBEntryDef.inc"
133 #undef MIBEntryDef
134 };
135 
136 // A type representing the id generated by hashing the contents of the Frame.
137 using FrameId = uint64_t;
138 // Describes a call frame for a dynamic allocation context. The contents of
139 // the frame are populated by symbolizing the stack depot call frame from the
140 // compiler runtime.
141 struct Frame {
142   // A uuid (uint64_t) identifying the function. It is obtained by
143   // llvm::md5(FunctionName) which returns the lower 64 bits.
144   GlobalValue::GUID Function;
145   // The symbol name for the function. Only populated in the Frame by the reader
146   // if requested during initialization. This field should not be serialized.
147   std::optional<std::string> SymbolName;
148   // The source line offset of the call from the beginning of parent function.
149   uint32_t LineOffset;
150   // The source column number of the call to help distinguish multiple calls
151   // on the same line.
152   uint32_t Column;
153   // Whether the current frame is inlined.
154   bool IsInlineFrame;
155 
156   Frame(const Frame &Other) {
157     Function = Other.Function;
158     SymbolName = Other.SymbolName;
159     LineOffset = Other.LineOffset;
160     Column = Other.Column;
161     IsInlineFrame = Other.IsInlineFrame;
162   }
163 
164   Frame(uint64_t Hash, uint32_t Off, uint32_t Col, bool Inline)
165       : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {}
166 
167   bool operator==(const Frame &Other) const {
168     // Ignore the SymbolName field to avoid a string compare. Comparing the
169     // function hash serves the same purpose.
170     return Other.Function == Function && Other.LineOffset == LineOffset &&
171            Other.Column == Column && Other.IsInlineFrame == IsInlineFrame;
172   }
173 
174   Frame &operator=(const Frame &Other) {
175     Function = Other.Function;
176     SymbolName = Other.SymbolName;
177     LineOffset = Other.LineOffset;
178     Column = Other.Column;
179     IsInlineFrame = Other.IsInlineFrame;
180     return *this;
181   }
182 
183   bool operator!=(const Frame &Other) const { return !operator==(Other); }
184 
185   // Write the contents of the frame to the ostream \p OS.
186   void serialize(raw_ostream &OS) const {
187     using namespace support;
188 
189     endian::Writer LE(OS, llvm::endianness::little);
190 
191     // If the type of the GlobalValue::GUID changes, then we need to update
192     // the reader and the writer.
193     static_assert(std::is_same<GlobalValue::GUID, uint64_t>::value,
194                   "Expect GUID to be uint64_t.");
195     LE.write<uint64_t>(Function);
196 
197     LE.write<uint32_t>(LineOffset);
198     LE.write<uint32_t>(Column);
199     LE.write<bool>(IsInlineFrame);
200   }
201 
202   // Read a frame from char data which has been serialized as little endian.
203   static Frame deserialize(const unsigned char *Ptr) {
204     using namespace support;
205 
206     const uint64_t F =
207         endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
208     const uint32_t L =
209         endian::readNext<uint32_t, llvm::endianness::little, unaligned>(Ptr);
210     const uint32_t C =
211         endian::readNext<uint32_t, llvm::endianness::little, unaligned>(Ptr);
212     const bool I =
213         endian::readNext<bool, llvm::endianness::little, unaligned>(Ptr);
214     return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C,
215                  /*IsInlineFrame=*/I);
216   }
217 
218   // Returns the size of the frame information.
219   static constexpr size_t serializedSize() {
220     return sizeof(Frame::Function) + sizeof(Frame::LineOffset) +
221            sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame);
222   }
223 
224   // Print the frame information in YAML format.
225   void printYAML(raw_ostream &OS) const {
226     OS << "      -\n"
227        << "        Function: " << Function << "\n"
228        << "        SymbolName: " << SymbolName.value_or("<None>") << "\n"
229        << "        LineOffset: " << LineOffset << "\n"
230        << "        Column: " << Column << "\n"
231        << "        Inline: " << IsInlineFrame << "\n";
232   }
233 
234   // Return a hash value based on the contents of the frame. Here we don't use
235   // hashing from llvm ADT since we are going to persist the hash id, the hash
236   // combine algorithm in ADT uses a new randomized seed each time.
237   inline FrameId hash() const {
238     auto HashCombine = [](auto Value, size_t Seed) {
239       std::hash<decltype(Value)> Hasher;
240       // The constant used below is the 64 bit representation of the fractional
241       // part of the golden ratio. Used here for the randomness in their bit
242       // pattern.
243       return Hasher(Value) + 0x9e3779b97f4a7c15 + (Seed << 6) + (Seed >> 2);
244     };
245 
246     size_t Result = 0;
247     Result ^= HashCombine(Function, Result);
248     Result ^= HashCombine(LineOffset, Result);
249     Result ^= HashCombine(Column, Result);
250     Result ^= HashCombine(IsInlineFrame, Result);
251     return static_cast<FrameId>(Result);
252   }
253 };
254 
255 // Holds allocation information in a space efficient format where frames are
256 // represented using unique identifiers.
257 struct IndexedAllocationInfo {
258   // The dynamic calling context for the allocation in bottom-up (leaf-to-root)
259   // order. Frame contents are stored out-of-line.
260   llvm::SmallVector<FrameId> CallStack;
261   // The statistics obtained from the runtime for the allocation.
262   PortableMemInfoBlock Info;
263 
264   IndexedAllocationInfo() = default;
265   IndexedAllocationInfo(ArrayRef<FrameId> CS, const MemInfoBlock &MB)
266       : CallStack(CS.begin(), CS.end()), Info(MB) {}
267 
268   // Returns the size in bytes when this allocation info struct is serialized.
269   size_t serializedSize() const {
270     return sizeof(uint64_t) + // The number of frames to serialize.
271            sizeof(FrameId) * CallStack.size() +    // The callstack frame ids.
272            PortableMemInfoBlock::serializedSize(); // The size of the payload.
273   }
274 
275   bool operator==(const IndexedAllocationInfo &Other) const {
276     if (Other.Info != Info)
277       return false;
278 
279     if (Other.CallStack.size() != CallStack.size())
280       return false;
281 
282     for (size_t J = 0; J < Other.CallStack.size(); J++) {
283       if (Other.CallStack[J] != CallStack[J])
284         return false;
285     }
286     return true;
287   }
288 
289   bool operator!=(const IndexedAllocationInfo &Other) const {
290     return !operator==(Other);
291   }
292 };
293 
294 // Holds allocation information with frame contents inline. The type should
295 // be used for temporary in-memory instances.
296 struct AllocationInfo {
297   // Same as IndexedAllocationInfo::CallStack with the frame contents inline.
298   llvm::SmallVector<Frame> CallStack;
299   // Same as IndexedAllocationInfo::Info;
300   PortableMemInfoBlock Info;
301 
302   AllocationInfo() = default;
303   AllocationInfo(
304       const IndexedAllocationInfo &IndexedAI,
305       llvm::function_ref<const Frame(const FrameId)> IdToFrameCallback) {
306     for (const FrameId &Id : IndexedAI.CallStack) {
307       CallStack.push_back(IdToFrameCallback(Id));
308     }
309     Info = IndexedAI.Info;
310   }
311 
312   void printYAML(raw_ostream &OS) const {
313     OS << "    -\n";
314     OS << "      Callstack:\n";
315     // TODO: Print out the frame on one line with to make it easier for deep
316     // callstacks once we have a test to check valid YAML is generated.
317     for (const Frame &F : CallStack) {
318       F.printYAML(OS);
319     }
320     Info.printYAML(OS);
321   }
322 };
323 
324 // Holds the memprof profile information for a function. The internal
325 // representation stores frame ids for efficiency. This representation should
326 // be used in the profile conversion and manipulation tools.
327 struct IndexedMemProfRecord {
328   // Memory allocation sites in this function for which we have memory
329   // profiling data.
330   llvm::SmallVector<IndexedAllocationInfo> AllocSites;
331   // Holds call sites in this function which are part of some memory
332   // allocation context. We store this as a list of locations, each with its
333   // list of inline locations in bottom-up order i.e. from leaf to root. The
334   // inline location list may include additional entries, users should pick
335   // the last entry in the list with the same function GUID.
336   llvm::SmallVector<llvm::SmallVector<FrameId>> CallSites;
337 
338   void clear() {
339     AllocSites.clear();
340     CallSites.clear();
341   }
342 
343   void merge(const IndexedMemProfRecord &Other) {
344     // TODO: Filter out duplicates which may occur if multiple memprof
345     // profiles are merged together using llvm-profdata.
346     AllocSites.append(Other.AllocSites);
347     CallSites.append(Other.CallSites);
348   }
349 
350   size_t serializedSize() const {
351     size_t Result = sizeof(GlobalValue::GUID);
352     for (const IndexedAllocationInfo &N : AllocSites)
353       Result += N.serializedSize();
354 
355     // The number of callsites we have information for.
356     Result += sizeof(uint64_t);
357     for (const auto &Frames : CallSites) {
358       // The number of frame ids to serialize.
359       Result += sizeof(uint64_t);
360       Result += Frames.size() * sizeof(FrameId);
361     }
362     return Result;
363   }
364 
365   bool operator==(const IndexedMemProfRecord &Other) const {
366     if (Other.AllocSites.size() != AllocSites.size())
367       return false;
368 
369     if (Other.CallSites.size() != CallSites.size())
370       return false;
371 
372     for (size_t I = 0; I < AllocSites.size(); I++) {
373       if (AllocSites[I] != Other.AllocSites[I])
374         return false;
375     }
376 
377     for (size_t I = 0; I < CallSites.size(); I++) {
378       if (CallSites[I] != Other.CallSites[I])
379         return false;
380     }
381     return true;
382   }
383 
384   // Serializes the memprof records in \p Records to the ostream \p OS based
385   // on the schema provided in \p Schema.
386   void serialize(const MemProfSchema &Schema, raw_ostream &OS);
387 
388   // Deserializes memprof records from the Buffer.
389   static IndexedMemProfRecord deserialize(const MemProfSchema &Schema,
390                                           const unsigned char *Buffer);
391 
392   // Returns the GUID for the function name after canonicalization. For
393   // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
394   // mapped to functions using this GUID.
395   static GlobalValue::GUID getGUID(const StringRef FunctionName);
396 };
397 
398 // Holds the memprof profile information for a function. The internal
399 // representation stores frame contents inline. This representation should
400 // be used for small amount of temporary, in memory instances.
401 struct MemProfRecord {
402   // Same as IndexedMemProfRecord::AllocSites with frame contents inline.
403   llvm::SmallVector<AllocationInfo> AllocSites;
404   // Same as IndexedMemProfRecord::CallSites with frame contents inline.
405   llvm::SmallVector<llvm::SmallVector<Frame>> CallSites;
406 
407   MemProfRecord() = default;
408   MemProfRecord(
409       const IndexedMemProfRecord &Record,
410       llvm::function_ref<const Frame(const FrameId Id)> IdToFrameCallback) {
411     for (const IndexedAllocationInfo &IndexedAI : Record.AllocSites) {
412       AllocSites.emplace_back(IndexedAI, IdToFrameCallback);
413     }
414     for (const ArrayRef<FrameId> Site : Record.CallSites) {
415       llvm::SmallVector<Frame> Frames;
416       for (const FrameId Id : Site) {
417         Frames.push_back(IdToFrameCallback(Id));
418       }
419       CallSites.push_back(Frames);
420     }
421   }
422 
423   // Prints out the contents of the memprof record in YAML.
424   void print(llvm::raw_ostream &OS) const {
425     if (!AllocSites.empty()) {
426       OS << "    AllocSites:\n";
427       for (const AllocationInfo &N : AllocSites)
428         N.printYAML(OS);
429     }
430 
431     if (!CallSites.empty()) {
432       OS << "    CallSites:\n";
433       for (const llvm::SmallVector<Frame> &Frames : CallSites) {
434         for (const Frame &F : Frames) {
435           OS << "    -\n";
436           F.printYAML(OS);
437         }
438       }
439     }
440   }
441 };
442 
443 // Reads a memprof schema from a buffer. All entries in the buffer are
444 // interpreted as uint64_t. The first entry in the buffer denotes the number of
445 // ids in the schema. Subsequent entries are integers which map to memprof::Meta
446 // enum class entries. After successfully reading the schema, the pointer is one
447 // byte past the schema contents.
448 Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer);
449 
450 // Trait for reading IndexedMemProfRecord data from the on-disk hash table.
451 class RecordLookupTrait {
452 public:
453   using data_type = const IndexedMemProfRecord &;
454   using internal_key_type = uint64_t;
455   using external_key_type = uint64_t;
456   using hash_value_type = uint64_t;
457   using offset_type = uint64_t;
458 
459   RecordLookupTrait() = delete;
460   RecordLookupTrait(const MemProfSchema &S) : Schema(S) {}
461 
462   static bool EqualKey(uint64_t A, uint64_t B) { return A == B; }
463   static uint64_t GetInternalKey(uint64_t K) { return K; }
464   static uint64_t GetExternalKey(uint64_t K) { return K; }
465 
466   hash_value_type ComputeHash(uint64_t K) { return K; }
467 
468   static std::pair<offset_type, offset_type>
469   ReadKeyDataLength(const unsigned char *&D) {
470     using namespace support;
471 
472     offset_type KeyLen =
473         endian::readNext<offset_type, llvm::endianness::little, unaligned>(D);
474     offset_type DataLen =
475         endian::readNext<offset_type, llvm::endianness::little, unaligned>(D);
476     return std::make_pair(KeyLen, DataLen);
477   }
478 
479   uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
480     using namespace support;
481     return endian::readNext<external_key_type, llvm::endianness::little,
482                             unaligned>(D);
483   }
484 
485   data_type ReadData(uint64_t K, const unsigned char *D,
486                      offset_type /*Unused*/) {
487     Record = IndexedMemProfRecord::deserialize(Schema, D);
488     return Record;
489   }
490 
491 private:
492   // Holds the memprof schema used to deserialize records.
493   MemProfSchema Schema;
494   // Holds the records from one function deserialized from the indexed format.
495   IndexedMemProfRecord Record;
496 };
497 
498 // Trait for writing IndexedMemProfRecord data to the on-disk hash table.
499 class RecordWriterTrait {
500 public:
501   using key_type = uint64_t;
502   using key_type_ref = uint64_t;
503 
504   using data_type = IndexedMemProfRecord;
505   using data_type_ref = IndexedMemProfRecord &;
506 
507   using hash_value_type = uint64_t;
508   using offset_type = uint64_t;
509 
510   // Pointer to the memprof schema to use for the generator. Unlike the reader
511   // we must use a default constructor with no params for the writer trait so we
512   // have a public member which must be initialized by the user.
513   MemProfSchema *Schema = nullptr;
514 
515   RecordWriterTrait() = default;
516 
517   static hash_value_type ComputeHash(key_type_ref K) { return K; }
518 
519   static std::pair<offset_type, offset_type>
520   EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
521     using namespace support;
522 
523     endian::Writer LE(Out, llvm::endianness::little);
524     offset_type N = sizeof(K);
525     LE.write<offset_type>(N);
526     offset_type M = V.serializedSize();
527     LE.write<offset_type>(M);
528     return std::make_pair(N, M);
529   }
530 
531   void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
532     using namespace support;
533     endian::Writer LE(Out, llvm::endianness::little);
534     LE.write<uint64_t>(K);
535   }
536 
537   void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
538                 offset_type /*Unused*/) {
539     assert(Schema != nullptr && "MemProf schema is not initialized!");
540     V.serialize(*Schema, Out);
541     // Clear the IndexedMemProfRecord which results in clearing/freeing its
542     // vectors of allocs and callsites. This is owned by the associated on-disk
543     // hash table, but unused after this point. See also the comment added to
544     // the client which constructs the on-disk hash table for this trait.
545     V.clear();
546   }
547 };
548 
549 // Trait for writing frame mappings to the on-disk hash table.
550 class FrameWriterTrait {
551 public:
552   using key_type = FrameId;
553   using key_type_ref = FrameId;
554 
555   using data_type = Frame;
556   using data_type_ref = Frame &;
557 
558   using hash_value_type = FrameId;
559   using offset_type = uint64_t;
560 
561   static hash_value_type ComputeHash(key_type_ref K) { return K; }
562 
563   static std::pair<offset_type, offset_type>
564   EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
565     using namespace support;
566     endian::Writer LE(Out, llvm::endianness::little);
567     offset_type N = sizeof(K);
568     LE.write<offset_type>(N);
569     offset_type M = V.serializedSize();
570     LE.write<offset_type>(M);
571     return std::make_pair(N, M);
572   }
573 
574   void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
575     using namespace support;
576     endian::Writer LE(Out, llvm::endianness::little);
577     LE.write<key_type>(K);
578   }
579 
580   void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
581                 offset_type /*Unused*/) {
582     V.serialize(Out);
583   }
584 };
585 
586 // Trait for reading frame mappings from the on-disk hash table.
587 class FrameLookupTrait {
588 public:
589   using data_type = const Frame;
590   using internal_key_type = FrameId;
591   using external_key_type = FrameId;
592   using hash_value_type = FrameId;
593   using offset_type = uint64_t;
594 
595   static bool EqualKey(internal_key_type A, internal_key_type B) {
596     return A == B;
597   }
598   static uint64_t GetInternalKey(internal_key_type K) { return K; }
599   static uint64_t GetExternalKey(external_key_type K) { return K; }
600 
601   hash_value_type ComputeHash(internal_key_type K) { return K; }
602 
603   static std::pair<offset_type, offset_type>
604   ReadKeyDataLength(const unsigned char *&D) {
605     using namespace support;
606 
607     offset_type KeyLen =
608         endian::readNext<offset_type, llvm::endianness::little, unaligned>(D);
609     offset_type DataLen =
610         endian::readNext<offset_type, llvm::endianness::little, unaligned>(D);
611     return std::make_pair(KeyLen, DataLen);
612   }
613 
614   uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
615     using namespace support;
616     return endian::readNext<external_key_type, llvm::endianness::little,
617                             unaligned>(D);
618   }
619 
620   data_type ReadData(uint64_t K, const unsigned char *D,
621                      offset_type /*Unused*/) {
622     return Frame::deserialize(D);
623   }
624 };
625 } // namespace memprof
626 } // namespace llvm
627 
628 #endif // LLVM_PROFILEDATA_MEMPROF_H_
629