1 //===- yaml2minidump.cpp - Convert a YAML file to a minidump file ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ObjectYAML/MinidumpYAML.h"
10 #include "llvm/ObjectYAML/yaml2obj.h"
11 #include "llvm/Support/ConvertUTF.h"
12 #include "llvm/Support/raw_ostream.h"
13 
14 using namespace llvm;
15 using namespace llvm::minidump;
16 using namespace llvm::MinidumpYAML;
17 
18 namespace {
19 /// A helper class to manage the placement of various structures into the final
20 /// minidump binary. Space for objects can be allocated via various allocate***
21 /// methods, while the final minidump file is written by calling the writeTo
22 /// method. The plain versions of allocation functions take a reference to the
23 /// data which is to be written (and hence the data must be available until
24 /// writeTo is called), while the "New" versions allocate the data in an
25 /// allocator-managed buffer, which is available until the allocator object is
26 /// destroyed. For both kinds of functions, it is possible to modify the
27 /// data for which the space has been "allocated" until the final writeTo call.
28 /// This is useful for "linking" the allocated structures via their offsets.
29 class BlobAllocator {
30 public:
31   size_t tell() const { return NextOffset; }
32 
33   size_t allocateCallback(size_t Size,
34                           std::function<void(raw_ostream &)> Callback) {
35     size_t Offset = NextOffset;
36     NextOffset += Size;
37     Callbacks.push_back(std::move(Callback));
38     return Offset;
39   }
40 
41   size_t allocateBytes(ArrayRef<uint8_t> Data) {
42     return allocateCallback(
43         Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); });
44   }
45 
46   size_t allocateBytes(yaml::BinaryRef Data) {
47     return allocateCallback(Data.binary_size(), [Data](raw_ostream &OS) {
48       Data.writeAsBinary(OS);
49     });
50   }
51 
52   template <typename T> size_t allocateArray(ArrayRef<T> Data) {
53     return allocateBytes({reinterpret_cast<const uint8_t *>(Data.data()),
54                           sizeof(T) * Data.size()});
55   }
56 
57   template <typename T, typename RangeType>
58   std::pair<size_t, MutableArrayRef<T>>
59   allocateNewArray(const iterator_range<RangeType> &Range);
60 
61   template <typename T> size_t allocateObject(const T &Data) {
62     return allocateArray(makeArrayRef(Data));
63   }
64 
65   template <typename T, typename... Types>
66   std::pair<size_t, T *> allocateNewObject(Types &&... Args) {
67     T *Object = new (Temporaries.Allocate<T>()) T(std::forward<Types>(Args)...);
68     return {allocateObject(*Object), Object};
69   }
70 
71   size_t allocateString(StringRef Str);
72 
73   void writeTo(raw_ostream &OS) const;
74 
75 private:
76   size_t NextOffset = 0;
77 
78   BumpPtrAllocator Temporaries;
79   std::vector<std::function<void(raw_ostream &)>> Callbacks;
80 };
81 } // namespace
82 
83 template <typename T, typename RangeType>
84 std::pair<size_t, MutableArrayRef<T>>
85 BlobAllocator::allocateNewArray(const iterator_range<RangeType> &Range) {
86   size_t Num = std::distance(Range.begin(), Range.end());
87   MutableArrayRef<T> Array(Temporaries.Allocate<T>(Num), Num);
88   std::uninitialized_copy(Range.begin(), Range.end(), Array.begin());
89   return {allocateArray(Array), Array};
90 }
91 
92 size_t BlobAllocator::allocateString(StringRef Str) {
93   SmallVector<UTF16, 32> WStr;
94   bool OK = convertUTF8ToUTF16String(Str, WStr);
95   assert(OK && "Invalid UTF8 in Str?");
96   (void)OK;
97 
98   // The utf16 string is null-terminated, but the terminator is not counted in
99   // the string size.
100   WStr.push_back(0);
101   size_t Result =
102       allocateNewObject<support::ulittle32_t>(2 * (WStr.size() - 1)).first;
103   allocateNewArray<support::ulittle16_t>(make_range(WStr.begin(), WStr.end()));
104   return Result;
105 }
106 
107 void BlobAllocator::writeTo(raw_ostream &OS) const {
108   size_t BeginOffset = OS.tell();
109   for (const auto &Callback : Callbacks)
110     Callback(OS);
111   assert(OS.tell() == BeginOffset + NextOffset &&
112          "Callbacks wrote an unexpected number of bytes.");
113   (void)BeginOffset;
114 }
115 
116 static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) {
117   return {support::ulittle32_t(Data.binary_size()),
118           support::ulittle32_t(File.allocateBytes(Data))};
119 }
120 
121 static size_t layout(BlobAllocator &File, MinidumpYAML::ExceptionStream &S) {
122   File.allocateObject(S.MDExceptionStream);
123 
124   size_t DataEnd = File.tell();
125 
126   // Lay out the thread context data, (which is not a part of the stream).
127   // TODO: This usually (always?) matches the thread context of the
128   // corresponding thread, and may overlap memory regions as well.  We could
129   // add a level of indirection to the MinidumpYAML format (like an array of
130   // Blobs that the LocationDescriptors index into) to be able to distinguish
131   // the cases where location descriptions overlap vs happen to reference
132   // identical data.
133   S.MDExceptionStream.ThreadContext = layout(File, S.ThreadContext);
134 
135   return DataEnd;
136 }
137 
138 static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) {
139   Range.Entry.Memory = layout(File, Range.Content);
140 }
141 
142 static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) {
143   M.Entry.ModuleNameRVA = File.allocateString(M.Name);
144 
145   M.Entry.CvRecord = layout(File, M.CvRecord);
146   M.Entry.MiscRecord = layout(File, M.MiscRecord);
147 }
148 
149 static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) {
150   T.Entry.Stack.Memory = layout(File, T.Stack);
151   T.Entry.Context = layout(File, T.Context);
152 }
153 
154 template <typename EntryT>
155 static size_t layout(BlobAllocator &File,
156                      MinidumpYAML::detail::ListStream<EntryT> &S) {
157 
158   File.allocateNewObject<support::ulittle32_t>(S.Entries.size());
159   for (auto &E : S.Entries)
160     File.allocateObject(E.Entry);
161 
162   size_t DataEnd = File.tell();
163 
164   // Lay out the auxiliary data, (which is not a part of the stream).
165   DataEnd = File.tell();
166   for (auto &E : S.Entries)
167     layout(File, E);
168 
169   return DataEnd;
170 }
171 
172 static Directory layout(BlobAllocator &File, Stream &S) {
173   Directory Result;
174   Result.Type = S.Type;
175   Result.Location.RVA = File.tell();
176   Optional<size_t> DataEnd;
177   switch (S.Kind) {
178   case Stream::StreamKind::Exception:
179     DataEnd = layout(File, cast<MinidumpYAML::ExceptionStream>(S));
180     break;
181   case Stream::StreamKind::MemoryInfoList: {
182     MemoryInfoListStream &InfoList = cast<MemoryInfoListStream>(S);
183     File.allocateNewObject<minidump::MemoryInfoListHeader>(
184         sizeof(minidump::MemoryInfoListHeader), sizeof(minidump::MemoryInfo),
185         InfoList.Infos.size());
186     File.allocateArray(makeArrayRef(InfoList.Infos));
187     break;
188   }
189   case Stream::StreamKind::MemoryList:
190     DataEnd = layout(File, cast<MemoryListStream>(S));
191     break;
192   case Stream::StreamKind::ModuleList:
193     DataEnd = layout(File, cast<ModuleListStream>(S));
194     break;
195   case Stream::StreamKind::RawContent: {
196     RawContentStream &Raw = cast<RawContentStream>(S);
197     File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) {
198       Raw.Content.writeAsBinary(OS);
199       assert(Raw.Content.binary_size() <= Raw.Size);
200       OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0');
201     });
202     break;
203   }
204   case Stream::StreamKind::SystemInfo: {
205     SystemInfoStream &SystemInfo = cast<SystemInfoStream>(S);
206     File.allocateObject(SystemInfo.Info);
207     // The CSD string is not a part of the stream.
208     DataEnd = File.tell();
209     SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion);
210     break;
211   }
212   case Stream::StreamKind::TextContent:
213     File.allocateArray(arrayRefFromStringRef(cast<TextContentStream>(S).Text));
214     break;
215   case Stream::StreamKind::ThreadList:
216     DataEnd = layout(File, cast<ThreadListStream>(S));
217     break;
218   }
219   // If DataEnd is not set, we assume everything we generated is a part of the
220   // stream.
221   Result.Location.DataSize =
222       DataEnd.getValueOr(File.tell()) - Result.Location.RVA;
223   return Result;
224 }
225 
226 namespace llvm {
227 namespace yaml {
228 
229 bool yaml2minidump(MinidumpYAML::Object &Obj, raw_ostream &Out,
230                    ErrorHandler /*EH*/) {
231   BlobAllocator File;
232   File.allocateObject(Obj.Header);
233 
234   std::vector<Directory> StreamDirectory(Obj.Streams.size());
235   Obj.Header.StreamDirectoryRVA =
236       File.allocateArray(makeArrayRef(StreamDirectory));
237   Obj.Header.NumberOfStreams = StreamDirectory.size();
238 
239   for (auto &Stream : enumerate(Obj.Streams))
240     StreamDirectory[Stream.index()] = layout(File, *Stream.value());
241 
242   File.writeTo(Out);
243   return true;
244 }
245 
246 } // namespace yaml
247 } // namespace llvm
248