1 //===-- BreakpadRecords.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/Support/Endian.h"
13 #include "llvm/Support/FormatVariadic.h"
14 
15 using namespace lldb_private;
16 using namespace lldb_private::breakpad;
17 
18 namespace {
19 enum class Token {
20   Unknown,
21   Module,
22   Info,
23   CodeID,
24   File,
25   Func,
26   Public,
27   Stack,
28   CFI,
29   Init,
30   Win,
31 };
32 }
33 
34 template<typename T>
35 static T stringTo(llvm::StringRef Str);
36 
stringTo(llvm::StringRef Str)37 template <> Token stringTo<Token>(llvm::StringRef Str) {
38   return llvm::StringSwitch<Token>(Str)
39       .Case("MODULE", Token::Module)
40       .Case("INFO", Token::Info)
41       .Case("CODE_ID", Token::CodeID)
42       .Case("FILE", Token::File)
43       .Case("FUNC", Token::Func)
44       .Case("PUBLIC", Token::Public)
45       .Case("STACK", Token::Stack)
46       .Case("CFI", Token::CFI)
47       .Case("INIT", Token::Init)
48       .Case("WIN", Token::Win)
49       .Default(Token::Unknown);
50 }
51 
52 template <>
stringTo(llvm::StringRef Str)53 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
54   using llvm::Triple;
55   return llvm::StringSwitch<Triple::OSType>(Str)
56       .Case("Linux", Triple::Linux)
57       .Case("mac", Triple::MacOSX)
58       .Case("windows", Triple::Win32)
59       .Default(Triple::UnknownOS);
60 }
61 
62 template <>
stringTo(llvm::StringRef Str)63 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
64   using llvm::Triple;
65   return llvm::StringSwitch<Triple::ArchType>(Str)
66       .Case("arm", Triple::arm)
67       .Cases("arm64", "arm64e", Triple::aarch64)
68       .Case("mips", Triple::mips)
69       .Case("ppc", Triple::ppc)
70       .Case("ppc64", Triple::ppc64)
71       .Case("s390", Triple::systemz)
72       .Case("sparc", Triple::sparc)
73       .Case("sparcv9", Triple::sparcv9)
74       .Case("x86", Triple::x86)
75       .Cases("x86_64", "x86_64h", Triple::x86_64)
76       .Default(Triple::UnknownArch);
77 }
78 
79 template<typename T>
consume(llvm::StringRef & Str)80 static T consume(llvm::StringRef &Str) {
81   llvm::StringRef Token;
82   std::tie(Token, Str) = getToken(Str);
83   return stringTo<T>(Token);
84 }
85 
86 /// Return the number of hex digits needed to encode an (POD) object of a given
87 /// type.
hex_digits()88 template <typename T> static constexpr size_t hex_digits() {
89   return 2 * sizeof(T);
90 }
91 
parseModuleId(llvm::Triple::OSType os,llvm::StringRef str)92 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
93   struct data_t {
94     using uuid_t = uint8_t[16];
95     uuid_t uuid;
96     llvm::support::ubig32_t age;
97   } data;
98   static_assert(sizeof(data) == 20, "");
99   // The textual module id encoding should be between 33 and 40 bytes long,
100   // depending on the size of the age field, which is of variable length.
101   // The first three chunks of the id are encoded in big endian, so we need to
102   // byte-swap those.
103   if (str.size() <= hex_digits<data_t::uuid_t>() ||
104       str.size() > hex_digits<data_t>())
105     return UUID();
106   if (!all_of(str, llvm::isHexDigit))
107     return UUID();
108 
109   llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>());
110   llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>());
111 
112   llvm::copy(fromHex(uuid_str), data.uuid);
113   uint32_t age;
114   bool success = to_integer(age_str, age, 16);
115   assert(success);
116   (void)success;
117   data.age = age;
118 
119   // On non-windows, the age field should always be zero, so we don't include to
120   // match the native uuid format of these platforms.
121   return UUID::fromData(&data, os == llvm::Triple::Win32 ? sizeof(data)
122                                                          : sizeof(data.uuid));
123 }
124 
classify(llvm::StringRef Line)125 llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) {
126   Token Tok = consume<Token>(Line);
127   switch (Tok) {
128   case Token::Module:
129     return Record::Module;
130   case Token::Info:
131     return Record::Info;
132   case Token::File:
133     return Record::File;
134   case Token::Func:
135     return Record::Func;
136   case Token::Public:
137     return Record::Public;
138   case Token::Stack:
139     Tok = consume<Token>(Line);
140     switch (Tok) {
141     case Token::CFI:
142       return Record::StackCFI;
143     case Token::Win:
144       return Record::StackWin;
145     default:
146       return llvm::None;
147     }
148 
149   case Token::Unknown:
150     // Optimistically assume that any unrecognised token means this is a line
151     // record, those don't have a special keyword and start directly with a
152     // hex number.
153     return Record::Line;
154 
155   case Token::CodeID:
156   case Token::CFI:
157   case Token::Init:
158   case Token::Win:
159     // These should never appear at the start of a valid record.
160     return llvm::None;
161   }
162   llvm_unreachable("Fully covered switch above!");
163 }
164 
parse(llvm::StringRef Line)165 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
166   // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
167   if (consume<Token>(Line) != Token::Module)
168     return llvm::None;
169 
170   llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line);
171   if (OS == llvm::Triple::UnknownOS)
172     return llvm::None;
173 
174   llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line);
175   if (Arch == llvm::Triple::UnknownArch)
176     return llvm::None;
177 
178   llvm::StringRef Str;
179   std::tie(Str, Line) = getToken(Line);
180   UUID ID = parseModuleId(OS, Str);
181   if (!ID)
182     return llvm::None;
183 
184   return ModuleRecord(OS, Arch, std::move(ID));
185 }
186 
operator <<(llvm::raw_ostream & OS,const ModuleRecord & R)187 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
188                                         const ModuleRecord &R) {
189   return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " "
190             << llvm::Triple::getArchTypeName(R.Arch) << " "
191             << R.ID.GetAsString();
192 }
193 
parse(llvm::StringRef Line)194 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
195   // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
196   if (consume<Token>(Line) != Token::Info)
197     return llvm::None;
198 
199   if (consume<Token>(Line) != Token::CodeID)
200     return llvm::None;
201 
202   llvm::StringRef Str;
203   std::tie(Str, Line) = getToken(Line);
204   // If we don't have any text following the code ID (e.g. on linux), we should
205   // use this as the UUID. Otherwise, we should revert back to the module ID.
206   UUID ID;
207   if (Line.trim().empty()) {
208     if (Str.empty() || !ID.SetFromStringRef(Str))
209       return llvm::None;
210   }
211   return InfoRecord(std::move(ID));
212 }
213 
operator <<(llvm::raw_ostream & OS,const InfoRecord & R)214 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
215                                         const InfoRecord &R) {
216   return OS << "INFO CODE_ID " << R.ID.GetAsString();
217 }
218 
parse(llvm::StringRef Line)219 llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
220   // FILE number name
221   if (consume<Token>(Line) != Token::File)
222     return llvm::None;
223 
224   llvm::StringRef Str;
225   size_t Number;
226   std::tie(Str, Line) = getToken(Line);
227   if (!to_integer(Str, Number))
228     return llvm::None;
229 
230   llvm::StringRef Name = Line.trim();
231   if (Name.empty())
232     return llvm::None;
233 
234   return FileRecord(Number, Name);
235 }
236 
operator <<(llvm::raw_ostream & OS,const FileRecord & R)237 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
238                                         const FileRecord &R) {
239   return OS << "FILE " << R.Number << " " << R.Name;
240 }
241 
parsePublicOrFunc(llvm::StringRef Line,bool & Multiple,lldb::addr_t & Address,lldb::addr_t * Size,lldb::addr_t & ParamSize,llvm::StringRef & Name)242 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
243                               lldb::addr_t &Address, lldb::addr_t *Size,
244                               lldb::addr_t &ParamSize, llvm::StringRef &Name) {
245   // PUBLIC [m] address param_size name
246   // or
247   // FUNC [m] address size param_size name
248 
249   Token Tok = Size ? Token::Func : Token::Public;
250 
251   if (consume<Token>(Line) != Tok)
252     return false;
253 
254   llvm::StringRef Str;
255   std::tie(Str, Line) = getToken(Line);
256   Multiple = Str == "m";
257 
258   if (Multiple)
259     std::tie(Str, Line) = getToken(Line);
260   if (!to_integer(Str, Address, 16))
261     return false;
262 
263   if (Tok == Token::Func) {
264     std::tie(Str, Line) = getToken(Line);
265     if (!to_integer(Str, *Size, 16))
266       return false;
267   }
268 
269   std::tie(Str, Line) = getToken(Line);
270   if (!to_integer(Str, ParamSize, 16))
271     return false;
272 
273   Name = Line.trim();
274   if (Name.empty())
275     return false;
276 
277   return true;
278 }
279 
parse(llvm::StringRef Line)280 llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) {
281   bool Multiple;
282   lldb::addr_t Address, Size, ParamSize;
283   llvm::StringRef Name;
284 
285   if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name))
286     return FuncRecord(Multiple, Address, Size, ParamSize, Name);
287 
288   return llvm::None;
289 }
290 
operator ==(const FuncRecord & L,const FuncRecord & R)291 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) {
292   return L.Multiple == R.Multiple && L.Address == R.Address &&
293          L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name;
294 }
operator <<(llvm::raw_ostream & OS,const FuncRecord & R)295 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
296                                         const FuncRecord &R) {
297   return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
298                              R.Multiple ? "m " : "", R.Address, R.Size,
299                              R.ParamSize, R.Name);
300 }
301 
parse(llvm::StringRef Line)302 llvm::Optional<LineRecord> LineRecord::parse(llvm::StringRef Line) {
303   lldb::addr_t Address;
304   llvm::StringRef Str;
305   std::tie(Str, Line) = getToken(Line);
306   if (!to_integer(Str, Address, 16))
307     return llvm::None;
308 
309   lldb::addr_t Size;
310   std::tie(Str, Line) = getToken(Line);
311   if (!to_integer(Str, Size, 16))
312     return llvm::None;
313 
314   uint32_t LineNum;
315   std::tie(Str, Line) = getToken(Line);
316   if (!to_integer(Str, LineNum))
317     return llvm::None;
318 
319   size_t FileNum;
320   std::tie(Str, Line) = getToken(Line);
321   if (!to_integer(Str, FileNum))
322     return llvm::None;
323 
324   return LineRecord(Address, Size, LineNum, FileNum);
325 }
326 
operator ==(const LineRecord & L,const LineRecord & R)327 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) {
328   return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum &&
329          L.FileNum == R.FileNum;
330 }
operator <<(llvm::raw_ostream & OS,const LineRecord & R)331 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
332                                         const LineRecord &R) {
333   return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size,
334                              R.LineNum, R.FileNum);
335 }
336 
parse(llvm::StringRef Line)337 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
338   bool Multiple;
339   lldb::addr_t Address, ParamSize;
340   llvm::StringRef Name;
341 
342   if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name))
343     return PublicRecord(Multiple, Address, ParamSize, Name);
344 
345   return llvm::None;
346 }
347 
operator ==(const PublicRecord & L,const PublicRecord & R)348 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) {
349   return L.Multiple == R.Multiple && L.Address == R.Address &&
350          L.ParamSize == R.ParamSize && L.Name == R.Name;
351 }
operator <<(llvm::raw_ostream & OS,const PublicRecord & R)352 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
353                                         const PublicRecord &R) {
354   return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
355                              R.Multiple ? "m " : "", R.Address, R.ParamSize,
356                              R.Name);
357 }
358 
parse(llvm::StringRef Line)359 llvm::Optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) {
360   // STACK CFI INIT address size reg1: expr1 reg2: expr2 ...
361   // or
362   // STACK CFI address reg1: expr1 reg2: expr2 ...
363   // No token in exprN ends with a colon.
364 
365   if (consume<Token>(Line) != Token::Stack)
366     return llvm::None;
367   if (consume<Token>(Line) != Token::CFI)
368     return llvm::None;
369 
370   llvm::StringRef Str;
371   std::tie(Str, Line) = getToken(Line);
372 
373   bool IsInitRecord = stringTo<Token>(Str) == Token::Init;
374   if (IsInitRecord)
375     std::tie(Str, Line) = getToken(Line);
376 
377   lldb::addr_t Address;
378   if (!to_integer(Str, Address, 16))
379     return llvm::None;
380 
381   llvm::Optional<lldb::addr_t> Size;
382   if (IsInitRecord) {
383     Size.emplace();
384     std::tie(Str, Line) = getToken(Line);
385     if (!to_integer(Str, *Size, 16))
386       return llvm::None;
387   }
388 
389   return StackCFIRecord(Address, Size, Line.trim());
390 }
391 
operator ==(const StackCFIRecord & L,const StackCFIRecord & R)392 bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) {
393   return L.Address == R.Address && L.Size == R.Size &&
394          L.UnwindRules == R.UnwindRules;
395 }
396 
operator <<(llvm::raw_ostream & OS,const StackCFIRecord & R)397 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
398                                         const StackCFIRecord &R) {
399   OS << "STACK CFI ";
400   if (R.Size)
401     OS << "INIT ";
402   OS << llvm::formatv("{0:x-} ", R.Address);
403   if (R.Size)
404     OS << llvm::formatv("{0:x-} ", *R.Size);
405   return OS << " " << R.UnwindRules;
406 }
407 
parse(llvm::StringRef Line)408 llvm::Optional<StackWinRecord> StackWinRecord::parse(llvm::StringRef Line) {
409   // STACK WIN type rva code_size prologue_size epilogue_size parameter_size
410   //     saved_register_size local_size max_stack_size has_program_string
411   //     program_string_OR_allocates_base_pointer
412 
413   if (consume<Token>(Line) != Token::Stack)
414     return llvm::None;
415   if (consume<Token>(Line) != Token::Win)
416     return llvm::None;
417 
418   llvm::StringRef Str;
419   uint8_t Type;
420   std::tie(Str, Line) = getToken(Line);
421   // Right now we only support the "FrameData" frame type.
422   if (!to_integer(Str, Type) || FrameType(Type) != FrameType::FrameData)
423     return llvm::None;
424 
425   lldb::addr_t RVA;
426   std::tie(Str, Line) = getToken(Line);
427   if (!to_integer(Str, RVA, 16))
428     return llvm::None;
429 
430   lldb::addr_t CodeSize;
431   std::tie(Str, Line) = getToken(Line);
432   if (!to_integer(Str, CodeSize, 16))
433     return llvm::None;
434 
435   // Skip fields which we aren't using right now.
436   std::tie(Str, Line) = getToken(Line); // prologue_size
437   std::tie(Str, Line) = getToken(Line); // epilogue_size
438 
439   lldb::addr_t ParameterSize;
440   std::tie(Str, Line) = getToken(Line);
441   if (!to_integer(Str, ParameterSize, 16))
442     return llvm::None;
443 
444   lldb::addr_t SavedRegisterSize;
445   std::tie(Str, Line) = getToken(Line);
446   if (!to_integer(Str, SavedRegisterSize, 16))
447     return llvm::None;
448 
449   lldb::addr_t LocalSize;
450   std::tie(Str, Line) = getToken(Line);
451   if (!to_integer(Str, LocalSize, 16))
452     return llvm::None;
453 
454   std::tie(Str, Line) = getToken(Line); // max_stack_size
455 
456   uint8_t HasProgramString;
457   std::tie(Str, Line) = getToken(Line);
458   if (!to_integer(Str, HasProgramString))
459     return llvm::None;
460   // FrameData records should always have a program string.
461   if (!HasProgramString)
462     return llvm::None;
463 
464   return StackWinRecord(RVA, CodeSize, ParameterSize, SavedRegisterSize,
465                         LocalSize, Line.trim());
466 }
467 
operator ==(const StackWinRecord & L,const StackWinRecord & R)468 bool breakpad::operator==(const StackWinRecord &L, const StackWinRecord &R) {
469   return L.RVA == R.RVA && L.CodeSize == R.CodeSize &&
470          L.ParameterSize == R.ParameterSize &&
471          L.SavedRegisterSize == R.SavedRegisterSize &&
472          L.LocalSize == R.LocalSize && L.ProgramString == R.ProgramString;
473 }
474 
operator <<(llvm::raw_ostream & OS,const StackWinRecord & R)475 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
476                                         const StackWinRecord &R) {
477   return OS << llvm::formatv(
478              "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", R.RVA,
479              R.CodeSize, R.ParameterSize, R.SavedRegisterSize, R.LocalSize,
480              R.ProgramString);
481 }
482 
toString(Record::Kind K)483 llvm::StringRef breakpad::toString(Record::Kind K) {
484   switch (K) {
485   case Record::Module:
486     return "MODULE";
487   case Record::Info:
488     return "INFO";
489   case Record::File:
490     return "FILE";
491   case Record::Func:
492     return "FUNC";
493   case Record::Line:
494     return "LINE";
495   case Record::Public:
496     return "PUBLIC";
497   case Record::StackCFI:
498     return "STACK CFI";
499   case Record::StackWin:
500     return "STACK WIN";
501   }
502   llvm_unreachable("Unknown record kind!");
503 }
504