1 //===- yaml2minidump.cpp - Convert a YAML file to a minidump file ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ObjectYAML/MinidumpYAML.h"
10 #include "llvm/ObjectYAML/yaml2obj.h"
11 #include "llvm/Support/ConvertUTF.h"
12 #include "llvm/Support/raw_ostream.h"
13 #include <optional>
14 
15 using namespace llvm;
16 using namespace llvm::minidump;
17 using namespace llvm::MinidumpYAML;
18 
19 namespace {
20 /// A helper class to manage the placement of various structures into the final
21 /// minidump binary. Space for objects can be allocated via various allocate***
22 /// methods, while the final minidump file is written by calling the writeTo
23 /// method. The plain versions of allocation functions take a reference to the
24 /// data which is to be written (and hence the data must be available until
25 /// writeTo is called), while the "New" versions allocate the data in an
26 /// allocator-managed buffer, which is available until the allocator object is
27 /// destroyed. For both kinds of functions, it is possible to modify the
28 /// data for which the space has been "allocated" until the final writeTo call.
29 /// This is useful for "linking" the allocated structures via their offsets.
30 class BlobAllocator {
31 public:
32   size_t tell() const { return NextOffset; }
33 
34   size_t allocateCallback(size_t Size,
35                           std::function<void(raw_ostream &)> Callback) {
36     size_t Offset = NextOffset;
37     NextOffset += Size;
38     Callbacks.push_back(std::move(Callback));
39     return Offset;
40   }
41 
42   size_t allocateBytes(ArrayRef<uint8_t> Data) {
43     return allocateCallback(
44         Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); });
45   }
46 
47   size_t allocateBytes(yaml::BinaryRef Data) {
48     return allocateCallback(Data.binary_size(), [Data](raw_ostream &OS) {
49       Data.writeAsBinary(OS);
50     });
51   }
52 
53   template <typename T> size_t allocateArray(ArrayRef<T> Data) {
54     return allocateBytes({reinterpret_cast<const uint8_t *>(Data.data()),
55                           sizeof(T) * Data.size()});
56   }
57 
58   template <typename T, typename RangeType>
59   std::pair<size_t, MutableArrayRef<T>>
60   allocateNewArray(const iterator_range<RangeType> &Range);
61 
62   template <typename T> size_t allocateObject(const T &Data) {
63     return allocateArray(ArrayRef(Data));
64   }
65 
66   template <typename T, typename... Types>
67   std::pair<size_t, T *> allocateNewObject(Types &&... Args) {
68     T *Object = new (Temporaries.Allocate<T>()) T(std::forward<Types>(Args)...);
69     return {allocateObject(*Object), Object};
70   }
71 
72   size_t allocateString(StringRef Str);
73 
74   void writeTo(raw_ostream &OS) const;
75 
76 private:
77   size_t NextOffset = 0;
78 
79   BumpPtrAllocator Temporaries;
80   std::vector<std::function<void(raw_ostream &)>> Callbacks;
81 };
82 } // namespace
83 
84 template <typename T, typename RangeType>
85 std::pair<size_t, MutableArrayRef<T>>
86 BlobAllocator::allocateNewArray(const iterator_range<RangeType> &Range) {
87   size_t Num = std::distance(Range.begin(), Range.end());
88   MutableArrayRef<T> Array(Temporaries.Allocate<T>(Num), Num);
89   std::uninitialized_copy(Range.begin(), Range.end(), Array.begin());
90   return {allocateArray(Array), Array};
91 }
92 
93 size_t BlobAllocator::allocateString(StringRef Str) {
94   SmallVector<UTF16, 32> WStr;
95   bool OK = convertUTF8ToUTF16String(Str, WStr);
96   assert(OK && "Invalid UTF8 in Str?");
97   (void)OK;
98 
99   // The utf16 string is null-terminated, but the terminator is not counted in
100   // the string size.
101   WStr.push_back(0);
102   size_t Result =
103       allocateNewObject<support::ulittle32_t>(2 * (WStr.size() - 1)).first;
104   allocateNewArray<support::ulittle16_t>(make_range(WStr.begin(), WStr.end()));
105   return Result;
106 }
107 
108 void BlobAllocator::writeTo(raw_ostream &OS) const {
109   size_t BeginOffset = OS.tell();
110   for (const auto &Callback : Callbacks)
111     Callback(OS);
112   assert(OS.tell() == BeginOffset + NextOffset &&
113          "Callbacks wrote an unexpected number of bytes.");
114   (void)BeginOffset;
115 }
116 
117 static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) {
118   return {support::ulittle32_t(Data.binary_size()),
119           support::ulittle32_t(File.allocateBytes(Data))};
120 }
121 
122 static size_t layout(BlobAllocator &File, MinidumpYAML::ExceptionStream &S) {
123   File.allocateObject(S.MDExceptionStream);
124 
125   size_t DataEnd = File.tell();
126 
127   // Lay out the thread context data, (which is not a part of the stream).
128   // TODO: This usually (always?) matches the thread context of the
129   // corresponding thread, and may overlap memory regions as well.  We could
130   // add a level of indirection to the MinidumpYAML format (like an array of
131   // Blobs that the LocationDescriptors index into) to be able to distinguish
132   // the cases where location descriptions overlap vs happen to reference
133   // identical data.
134   S.MDExceptionStream.ThreadContext = layout(File, S.ThreadContext);
135 
136   return DataEnd;
137 }
138 
139 static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) {
140   Range.Entry.Memory = layout(File, Range.Content);
141 }
142 
143 static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) {
144   M.Entry.ModuleNameRVA = File.allocateString(M.Name);
145 
146   M.Entry.CvRecord = layout(File, M.CvRecord);
147   M.Entry.MiscRecord = layout(File, M.MiscRecord);
148 }
149 
150 static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) {
151   T.Entry.Stack.Memory = layout(File, T.Stack);
152   T.Entry.Context = layout(File, T.Context);
153 }
154 
155 template <typename EntryT>
156 static size_t layout(BlobAllocator &File,
157                      MinidumpYAML::detail::ListStream<EntryT> &S) {
158 
159   File.allocateNewObject<support::ulittle32_t>(S.Entries.size());
160   for (auto &E : S.Entries)
161     File.allocateObject(E.Entry);
162 
163   size_t DataEnd = File.tell();
164 
165   // Lay out the auxiliary data, (which is not a part of the stream).
166   DataEnd = File.tell();
167   for (auto &E : S.Entries)
168     layout(File, E);
169 
170   return DataEnd;
171 }
172 
173 static Directory layout(BlobAllocator &File, Stream &S) {
174   Directory Result;
175   Result.Type = S.Type;
176   Result.Location.RVA = File.tell();
177   std::optional<size_t> DataEnd;
178   switch (S.Kind) {
179   case Stream::StreamKind::Exception:
180     DataEnd = layout(File, cast<MinidumpYAML::ExceptionStream>(S));
181     break;
182   case Stream::StreamKind::MemoryInfoList: {
183     MemoryInfoListStream &InfoList = cast<MemoryInfoListStream>(S);
184     File.allocateNewObject<minidump::MemoryInfoListHeader>(
185         sizeof(minidump::MemoryInfoListHeader), sizeof(minidump::MemoryInfo),
186         InfoList.Infos.size());
187     File.allocateArray(ArrayRef(InfoList.Infos));
188     break;
189   }
190   case Stream::StreamKind::MemoryList:
191     DataEnd = layout(File, cast<MemoryListStream>(S));
192     break;
193   case Stream::StreamKind::ModuleList:
194     DataEnd = layout(File, cast<ModuleListStream>(S));
195     break;
196   case Stream::StreamKind::RawContent: {
197     RawContentStream &Raw = cast<RawContentStream>(S);
198     File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) {
199       Raw.Content.writeAsBinary(OS);
200       assert(Raw.Content.binary_size() <= Raw.Size);
201       OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0');
202     });
203     break;
204   }
205   case Stream::StreamKind::SystemInfo: {
206     SystemInfoStream &SystemInfo = cast<SystemInfoStream>(S);
207     File.allocateObject(SystemInfo.Info);
208     // The CSD string is not a part of the stream.
209     DataEnd = File.tell();
210     SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion);
211     break;
212   }
213   case Stream::StreamKind::TextContent:
214     File.allocateArray(arrayRefFromStringRef(cast<TextContentStream>(S).Text));
215     break;
216   case Stream::StreamKind::ThreadList:
217     DataEnd = layout(File, cast<ThreadListStream>(S));
218     break;
219   }
220   // If DataEnd is not set, we assume everything we generated is a part of the
221   // stream.
222   Result.Location.DataSize =
223       DataEnd.value_or(File.tell()) - Result.Location.RVA;
224   return Result;
225 }
226 
227 namespace llvm {
228 namespace yaml {
229 
230 bool yaml2minidump(MinidumpYAML::Object &Obj, raw_ostream &Out,
231                    ErrorHandler /*EH*/) {
232   BlobAllocator File;
233   File.allocateObject(Obj.Header);
234 
235   std::vector<Directory> StreamDirectory(Obj.Streams.size());
236   Obj.Header.StreamDirectoryRVA = File.allocateArray(ArrayRef(StreamDirectory));
237   Obj.Header.NumberOfStreams = StreamDirectory.size();
238 
239   for (const auto &[Index, Stream] : enumerate(Obj.Streams))
240     StreamDirectory[Index] = layout(File, *Stream);
241 
242   File.writeTo(Out);
243   return true;
244 }
245 
246 } // namespace yaml
247 } // namespace llvm
248