1 //===-- BreakpadRecords.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/Support/Endian.h"
13 #include "llvm/Support/FormatVariadic.h"
14 #include <optional>
15 
16 using namespace lldb_private;
17 using namespace lldb_private::breakpad;
18 
19 namespace {
20 enum class Token {
21   Unknown,
22   Module,
23   Info,
24   CodeID,
25   File,
26   Func,
27   Inline,
28   InlineOrigin,
29   Public,
30   Stack,
31   CFI,
32   Init,
33   Win,
34 };
35 }
36 
37 template<typename T>
38 static T stringTo(llvm::StringRef Str);
39 
40 template <> Token stringTo<Token>(llvm::StringRef Str) {
41   return llvm::StringSwitch<Token>(Str)
42       .Case("MODULE", Token::Module)
43       .Case("INFO", Token::Info)
44       .Case("CODE_ID", Token::CodeID)
45       .Case("FILE", Token::File)
46       .Case("FUNC", Token::Func)
47       .Case("INLINE", Token::Inline)
48       .Case("INLINE_ORIGIN", Token::InlineOrigin)
49       .Case("PUBLIC", Token::Public)
50       .Case("STACK", Token::Stack)
51       .Case("CFI", Token::CFI)
52       .Case("INIT", Token::Init)
53       .Case("WIN", Token::Win)
54       .Default(Token::Unknown);
55 }
56 
57 template <>
58 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
59   using llvm::Triple;
60   return llvm::StringSwitch<Triple::OSType>(Str)
61       .Case("Linux", Triple::Linux)
62       .Case("mac", Triple::MacOSX)
63       .Case("windows", Triple::Win32)
64       .Default(Triple::UnknownOS);
65 }
66 
67 template <>
68 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
69   using llvm::Triple;
70   return llvm::StringSwitch<Triple::ArchType>(Str)
71       .Case("arm", Triple::arm)
72       .Cases("arm64", "arm64e", Triple::aarch64)
73       .Case("mips", Triple::mips)
74       .Case("msp430", Triple::msp430)
75       .Case("ppc", Triple::ppc)
76       .Case("ppc64", Triple::ppc64)
77       .Case("s390", Triple::systemz)
78       .Case("sparc", Triple::sparc)
79       .Case("sparcv9", Triple::sparcv9)
80       .Case("x86", Triple::x86)
81       .Cases("x86_64", "x86_64h", Triple::x86_64)
82       .Default(Triple::UnknownArch);
83 }
84 
85 template<typename T>
86 static T consume(llvm::StringRef &Str) {
87   llvm::StringRef Token;
88   std::tie(Token, Str) = getToken(Str);
89   return stringTo<T>(Token);
90 }
91 
92 /// Return the number of hex digits needed to encode an (POD) object of a given
93 /// type.
94 template <typename T> static constexpr size_t hex_digits() {
95   return 2 * sizeof(T);
96 }
97 
98 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
99   struct data_t {
100     using uuid_t = uint8_t[16];
101     uuid_t uuid;
102     llvm::support::ubig32_t age;
103   } data;
104   static_assert(sizeof(data) == 20);
105   // The textual module id encoding should be between 33 and 40 bytes long,
106   // depending on the size of the age field, which is of variable length.
107   // The first three chunks of the id are encoded in big endian, so we need to
108   // byte-swap those.
109   if (str.size() <= hex_digits<data_t::uuid_t>() ||
110       str.size() > hex_digits<data_t>())
111     return UUID();
112   if (!all_of(str, llvm::isHexDigit))
113     return UUID();
114 
115   llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>());
116   llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>());
117 
118   llvm::copy(fromHex(uuid_str), data.uuid);
119   uint32_t age;
120   bool success = to_integer(age_str, age, 16);
121   assert(success);
122   (void)success;
123   data.age = age;
124 
125   // On non-windows, the age field should always be zero, so we don't include to
126   // match the native uuid format of these platforms.
127   return UUID(&data, os == llvm::Triple::Win32 ? sizeof(data)
128                                                : sizeof(data.uuid));
129 }
130 
131 std::optional<Record::Kind> Record::classify(llvm::StringRef Line) {
132   Token Tok = consume<Token>(Line);
133   switch (Tok) {
134   case Token::Module:
135     return Record::Module;
136   case Token::Info:
137     return Record::Info;
138   case Token::File:
139     return Record::File;
140   case Token::Func:
141     return Record::Func;
142   case Token::Public:
143     return Record::Public;
144   case Token::Stack:
145     Tok = consume<Token>(Line);
146     switch (Tok) {
147     case Token::CFI:
148       return Record::StackCFI;
149     case Token::Win:
150       return Record::StackWin;
151     default:
152       return std::nullopt;
153     }
154   case Token::Inline:
155     return Record::Inline;
156   case Token::InlineOrigin:
157     return Record::InlineOrigin;
158   case Token::Unknown:
159     // Optimistically assume that any unrecognised token means this is a line
160     // record, those don't have a special keyword and start directly with a
161     // hex number.
162     return Record::Line;
163 
164   case Token::CodeID:
165   case Token::CFI:
166   case Token::Init:
167   case Token::Win:
168     // These should never appear at the start of a valid record.
169     return std::nullopt;
170   }
171   llvm_unreachable("Fully covered switch above!");
172 }
173 
174 std::optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
175   // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
176   if (consume<Token>(Line) != Token::Module)
177     return std::nullopt;
178 
179   llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line);
180   if (OS == llvm::Triple::UnknownOS)
181     return std::nullopt;
182 
183   llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line);
184   if (Arch == llvm::Triple::UnknownArch)
185     return std::nullopt;
186 
187   llvm::StringRef Str;
188   std::tie(Str, Line) = getToken(Line);
189   UUID ID = parseModuleId(OS, Str);
190   if (!ID)
191     return std::nullopt;
192 
193   return ModuleRecord(OS, Arch, std::move(ID));
194 }
195 
196 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
197                                         const ModuleRecord &R) {
198   return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " "
199             << llvm::Triple::getArchTypeName(R.Arch) << " "
200             << R.ID.GetAsString();
201 }
202 
203 std::optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
204   // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
205   if (consume<Token>(Line) != Token::Info)
206     return std::nullopt;
207 
208   if (consume<Token>(Line) != Token::CodeID)
209     return std::nullopt;
210 
211   llvm::StringRef Str;
212   std::tie(Str, Line) = getToken(Line);
213   // If we don't have any text following the code ID (e.g. on linux), we should
214   // use this as the UUID. Otherwise, we should revert back to the module ID.
215   UUID ID;
216   if (Line.trim().empty()) {
217     if (Str.empty() || !ID.SetFromStringRef(Str))
218       return std::nullopt;
219   }
220   return InfoRecord(std::move(ID));
221 }
222 
223 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
224                                         const InfoRecord &R) {
225   return OS << "INFO CODE_ID " << R.ID.GetAsString();
226 }
227 
228 template <typename T>
229 static std::optional<T> parseNumberName(llvm::StringRef Line, Token TokenType) {
230   // TOKEN number name
231   if (consume<Token>(Line) != TokenType)
232     return std::nullopt;
233 
234   llvm::StringRef Str;
235   size_t Number;
236   std::tie(Str, Line) = getToken(Line);
237   if (!to_integer(Str, Number))
238     return std::nullopt;
239 
240   llvm::StringRef Name = Line.trim();
241   if (Name.empty())
242     return std::nullopt;
243 
244   return T(Number, Name);
245 }
246 
247 std::optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
248   // FILE number name
249   return parseNumberName<FileRecord>(Line, Token::File);
250 }
251 
252 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
253                                         const FileRecord &R) {
254   return OS << "FILE " << R.Number << " " << R.Name;
255 }
256 
257 std::optional<InlineOriginRecord>
258 InlineOriginRecord::parse(llvm::StringRef Line) {
259   // INLINE_ORIGIN number name
260   return parseNumberName<InlineOriginRecord>(Line, Token::InlineOrigin);
261 }
262 
263 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
264                                         const InlineOriginRecord &R) {
265   return OS << "INLINE_ORIGIN " << R.Number << " " << R.Name;
266 }
267 
268 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
269                               lldb::addr_t &Address, lldb::addr_t *Size,
270                               lldb::addr_t &ParamSize, llvm::StringRef &Name) {
271   // PUBLIC [m] address param_size name
272   // or
273   // FUNC [m] address size param_size name
274 
275   Token Tok = Size ? Token::Func : Token::Public;
276 
277   if (consume<Token>(Line) != Tok)
278     return false;
279 
280   llvm::StringRef Str;
281   std::tie(Str, Line) = getToken(Line);
282   Multiple = Str == "m";
283 
284   if (Multiple)
285     std::tie(Str, Line) = getToken(Line);
286   if (!to_integer(Str, Address, 16))
287     return false;
288 
289   if (Tok == Token::Func) {
290     std::tie(Str, Line) = getToken(Line);
291     if (!to_integer(Str, *Size, 16))
292       return false;
293   }
294 
295   std::tie(Str, Line) = getToken(Line);
296   if (!to_integer(Str, ParamSize, 16))
297     return false;
298 
299   Name = Line.trim();
300   if (Name.empty())
301     return false;
302 
303   return true;
304 }
305 
306 std::optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) {
307   bool Multiple;
308   lldb::addr_t Address, Size, ParamSize;
309   llvm::StringRef Name;
310 
311   if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name))
312     return FuncRecord(Multiple, Address, Size, ParamSize, Name);
313 
314   return std::nullopt;
315 }
316 
317 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) {
318   return L.Multiple == R.Multiple && L.Address == R.Address &&
319          L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name;
320 }
321 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
322                                         const FuncRecord &R) {
323   return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
324                              R.Multiple ? "m " : "", R.Address, R.Size,
325                              R.ParamSize, R.Name);
326 }
327 
328 std::optional<InlineRecord> InlineRecord::parse(llvm::StringRef Line) {
329   // INLINE inline_nest_level call_site_line call_site_file_num origin_num
330   // [address size]+
331   if (consume<Token>(Line) != Token::Inline)
332     return std::nullopt;
333 
334   llvm::SmallVector<llvm::StringRef> Tokens;
335   SplitString(Line, Tokens, " ");
336   if (Tokens.size() < 6 || Tokens.size() % 2 == 1)
337     return std::nullopt;
338 
339   size_t InlineNestLevel;
340   uint32_t CallSiteLineNum;
341   size_t CallSiteFileNum;
342   size_t OriginNum;
343   if (!(to_integer(Tokens[0], InlineNestLevel) &&
344         to_integer(Tokens[1], CallSiteLineNum) &&
345         to_integer(Tokens[2], CallSiteFileNum) &&
346         to_integer(Tokens[3], OriginNum)))
347     return std::nullopt;
348 
349   InlineRecord Record = InlineRecord(InlineNestLevel, CallSiteLineNum,
350                                      CallSiteFileNum, OriginNum);
351   for (size_t i = 4; i < Tokens.size(); i += 2) {
352     lldb::addr_t Address;
353     if (!to_integer(Tokens[i], Address, 16))
354       return std::nullopt;
355     lldb::addr_t Size;
356     if (!to_integer(Tokens[i + 1].trim(), Size, 16))
357       return std::nullopt;
358     Record.Ranges.emplace_back(Address, Size);
359   }
360   return Record;
361 }
362 
363 bool breakpad::operator==(const InlineRecord &L, const InlineRecord &R) {
364   return L.InlineNestLevel == R.InlineNestLevel &&
365          L.CallSiteLineNum == R.CallSiteLineNum &&
366          L.CallSiteFileNum == R.CallSiteFileNum && L.OriginNum == R.OriginNum &&
367          L.Ranges == R.Ranges;
368 }
369 
370 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
371                                         const InlineRecord &R) {
372   OS << llvm::formatv("INLINE {0} {1} {2} {3}", R.InlineNestLevel,
373                       R.CallSiteLineNum, R.CallSiteFileNum, R.OriginNum);
374   for (const auto &range : R.Ranges) {
375     OS << llvm::formatv(" {0:x-} {1:x-}", range.first, range.second);
376   }
377   return OS;
378 }
379 
380 std::optional<LineRecord> LineRecord::parse(llvm::StringRef Line) {
381   lldb::addr_t Address;
382   llvm::StringRef Str;
383   std::tie(Str, Line) = getToken(Line);
384   if (!to_integer(Str, Address, 16))
385     return std::nullopt;
386 
387   lldb::addr_t Size;
388   std::tie(Str, Line) = getToken(Line);
389   if (!to_integer(Str, Size, 16))
390     return std::nullopt;
391 
392   uint32_t LineNum;
393   std::tie(Str, Line) = getToken(Line);
394   if (!to_integer(Str, LineNum))
395     return std::nullopt;
396 
397   size_t FileNum;
398   std::tie(Str, Line) = getToken(Line);
399   if (!to_integer(Str, FileNum))
400     return std::nullopt;
401 
402   return LineRecord(Address, Size, LineNum, FileNum);
403 }
404 
405 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) {
406   return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum &&
407          L.FileNum == R.FileNum;
408 }
409 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
410                                         const LineRecord &R) {
411   return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size,
412                              R.LineNum, R.FileNum);
413 }
414 
415 std::optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
416   bool Multiple;
417   lldb::addr_t Address, ParamSize;
418   llvm::StringRef Name;
419 
420   if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name))
421     return PublicRecord(Multiple, Address, ParamSize, Name);
422 
423   return std::nullopt;
424 }
425 
426 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) {
427   return L.Multiple == R.Multiple && L.Address == R.Address &&
428          L.ParamSize == R.ParamSize && L.Name == R.Name;
429 }
430 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
431                                         const PublicRecord &R) {
432   return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
433                              R.Multiple ? "m " : "", R.Address, R.ParamSize,
434                              R.Name);
435 }
436 
437 std::optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) {
438   // STACK CFI INIT address size reg1: expr1 reg2: expr2 ...
439   // or
440   // STACK CFI address reg1: expr1 reg2: expr2 ...
441   // No token in exprN ends with a colon.
442 
443   if (consume<Token>(Line) != Token::Stack)
444     return std::nullopt;
445   if (consume<Token>(Line) != Token::CFI)
446     return std::nullopt;
447 
448   llvm::StringRef Str;
449   std::tie(Str, Line) = getToken(Line);
450 
451   bool IsInitRecord = stringTo<Token>(Str) == Token::Init;
452   if (IsInitRecord)
453     std::tie(Str, Line) = getToken(Line);
454 
455   lldb::addr_t Address;
456   if (!to_integer(Str, Address, 16))
457     return std::nullopt;
458 
459   std::optional<lldb::addr_t> Size;
460   if (IsInitRecord) {
461     Size.emplace();
462     std::tie(Str, Line) = getToken(Line);
463     if (!to_integer(Str, *Size, 16))
464       return std::nullopt;
465   }
466 
467   return StackCFIRecord(Address, Size, Line.trim());
468 }
469 
470 bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) {
471   return L.Address == R.Address && L.Size == R.Size &&
472          L.UnwindRules == R.UnwindRules;
473 }
474 
475 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
476                                         const StackCFIRecord &R) {
477   OS << "STACK CFI ";
478   if (R.Size)
479     OS << "INIT ";
480   OS << llvm::formatv("{0:x-} ", R.Address);
481   if (R.Size)
482     OS << llvm::formatv("{0:x-} ", *R.Size);
483   return OS << " " << R.UnwindRules;
484 }
485 
486 std::optional<StackWinRecord> StackWinRecord::parse(llvm::StringRef Line) {
487   // STACK WIN type rva code_size prologue_size epilogue_size parameter_size
488   //     saved_register_size local_size max_stack_size has_program_string
489   //     program_string_OR_allocates_base_pointer
490 
491   if (consume<Token>(Line) != Token::Stack)
492     return std::nullopt;
493   if (consume<Token>(Line) != Token::Win)
494     return std::nullopt;
495 
496   llvm::StringRef Str;
497   uint8_t Type;
498   std::tie(Str, Line) = getToken(Line);
499   // Right now we only support the "FrameData" frame type.
500   if (!to_integer(Str, Type) || FrameType(Type) != FrameType::FrameData)
501     return std::nullopt;
502 
503   lldb::addr_t RVA;
504   std::tie(Str, Line) = getToken(Line);
505   if (!to_integer(Str, RVA, 16))
506     return std::nullopt;
507 
508   lldb::addr_t CodeSize;
509   std::tie(Str, Line) = getToken(Line);
510   if (!to_integer(Str, CodeSize, 16))
511     return std::nullopt;
512 
513   // Skip fields which we aren't using right now.
514   std::tie(Str, Line) = getToken(Line); // prologue_size
515   std::tie(Str, Line) = getToken(Line); // epilogue_size
516 
517   lldb::addr_t ParameterSize;
518   std::tie(Str, Line) = getToken(Line);
519   if (!to_integer(Str, ParameterSize, 16))
520     return std::nullopt;
521 
522   lldb::addr_t SavedRegisterSize;
523   std::tie(Str, Line) = getToken(Line);
524   if (!to_integer(Str, SavedRegisterSize, 16))
525     return std::nullopt;
526 
527   lldb::addr_t LocalSize;
528   std::tie(Str, Line) = getToken(Line);
529   if (!to_integer(Str, LocalSize, 16))
530     return std::nullopt;
531 
532   std::tie(Str, Line) = getToken(Line); // max_stack_size
533 
534   uint8_t HasProgramString;
535   std::tie(Str, Line) = getToken(Line);
536   if (!to_integer(Str, HasProgramString))
537     return std::nullopt;
538   // FrameData records should always have a program string.
539   if (!HasProgramString)
540     return std::nullopt;
541 
542   return StackWinRecord(RVA, CodeSize, ParameterSize, SavedRegisterSize,
543                         LocalSize, Line.trim());
544 }
545 
546 bool breakpad::operator==(const StackWinRecord &L, const StackWinRecord &R) {
547   return L.RVA == R.RVA && L.CodeSize == R.CodeSize &&
548          L.ParameterSize == R.ParameterSize &&
549          L.SavedRegisterSize == R.SavedRegisterSize &&
550          L.LocalSize == R.LocalSize && L.ProgramString == R.ProgramString;
551 }
552 
553 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
554                                         const StackWinRecord &R) {
555   return OS << llvm::formatv(
556              "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", R.RVA,
557              R.CodeSize, R.ParameterSize, R.SavedRegisterSize, R.LocalSize,
558              R.ProgramString);
559 }
560 
561 llvm::StringRef breakpad::toString(Record::Kind K) {
562   switch (K) {
563   case Record::Module:
564     return "MODULE";
565   case Record::Info:
566     return "INFO";
567   case Record::File:
568     return "FILE";
569   case Record::Func:
570     return "FUNC";
571   case Record::Inline:
572     return "INLINE";
573   case Record::InlineOrigin:
574     return "INLINE_ORIGIN";
575   case Record::Line:
576     return "LINE";
577   case Record::Public:
578     return "PUBLIC";
579   case Record::StackCFI:
580     return "STACK CFI";
581   case Record::StackWin:
582     return "STACK WIN";
583   }
584   llvm_unreachable("Unknown record kind!");
585 }
586