1 //===-- BreakpadRecords.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/Support/Endian.h"
13 #include "llvm/Support/FormatVariadic.h"
14 
15 using namespace lldb_private;
16 using namespace lldb_private::breakpad;
17 
18 namespace {
19 enum class Token {
20   Unknown,
21   Module,
22   Info,
23   CodeID,
24   File,
25   Func,
26   Inline,
27   InlineOrigin,
28   Public,
29   Stack,
30   CFI,
31   Init,
32   Win,
33 };
34 }
35 
36 template<typename T>
37 static T stringTo(llvm::StringRef Str);
38 
39 template <> Token stringTo<Token>(llvm::StringRef Str) {
40   return llvm::StringSwitch<Token>(Str)
41       .Case("MODULE", Token::Module)
42       .Case("INFO", Token::Info)
43       .Case("CODE_ID", Token::CodeID)
44       .Case("FILE", Token::File)
45       .Case("FUNC", Token::Func)
46       .Case("INLINE", Token::Inline)
47       .Case("INLINE_ORIGIN", Token::InlineOrigin)
48       .Case("PUBLIC", Token::Public)
49       .Case("STACK", Token::Stack)
50       .Case("CFI", Token::CFI)
51       .Case("INIT", Token::Init)
52       .Case("WIN", Token::Win)
53       .Default(Token::Unknown);
54 }
55 
56 template <>
57 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
58   using llvm::Triple;
59   return llvm::StringSwitch<Triple::OSType>(Str)
60       .Case("Linux", Triple::Linux)
61       .Case("mac", Triple::MacOSX)
62       .Case("windows", Triple::Win32)
63       .Default(Triple::UnknownOS);
64 }
65 
66 template <>
67 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
68   using llvm::Triple;
69   return llvm::StringSwitch<Triple::ArchType>(Str)
70       .Case("arm", Triple::arm)
71       .Cases("arm64", "arm64e", Triple::aarch64)
72       .Case("mips", Triple::mips)
73       .Case("ppc", Triple::ppc)
74       .Case("ppc64", Triple::ppc64)
75       .Case("s390", Triple::systemz)
76       .Case("sparc", Triple::sparc)
77       .Case("sparcv9", Triple::sparcv9)
78       .Case("x86", Triple::x86)
79       .Cases("x86_64", "x86_64h", Triple::x86_64)
80       .Default(Triple::UnknownArch);
81 }
82 
83 template<typename T>
84 static T consume(llvm::StringRef &Str) {
85   llvm::StringRef Token;
86   std::tie(Token, Str) = getToken(Str);
87   return stringTo<T>(Token);
88 }
89 
90 /// Return the number of hex digits needed to encode an (POD) object of a given
91 /// type.
92 template <typename T> static constexpr size_t hex_digits() {
93   return 2 * sizeof(T);
94 }
95 
96 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
97   struct data_t {
98     using uuid_t = uint8_t[16];
99     uuid_t uuid;
100     llvm::support::ubig32_t age;
101   } data;
102   static_assert(sizeof(data) == 20, "");
103   // The textual module id encoding should be between 33 and 40 bytes long,
104   // depending on the size of the age field, which is of variable length.
105   // The first three chunks of the id are encoded in big endian, so we need to
106   // byte-swap those.
107   if (str.size() <= hex_digits<data_t::uuid_t>() ||
108       str.size() > hex_digits<data_t>())
109     return UUID();
110   if (!all_of(str, llvm::isHexDigit))
111     return UUID();
112 
113   llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>());
114   llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>());
115 
116   llvm::copy(fromHex(uuid_str), data.uuid);
117   uint32_t age;
118   bool success = to_integer(age_str, age, 16);
119   assert(success);
120   (void)success;
121   data.age = age;
122 
123   // On non-windows, the age field should always be zero, so we don't include to
124   // match the native uuid format of these platforms.
125   return UUID::fromData(&data, os == llvm::Triple::Win32 ? sizeof(data)
126                                                          : sizeof(data.uuid));
127 }
128 
129 llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) {
130   Token Tok = consume<Token>(Line);
131   switch (Tok) {
132   case Token::Module:
133     return Record::Module;
134   case Token::Info:
135     return Record::Info;
136   case Token::File:
137     return Record::File;
138   case Token::Func:
139     return Record::Func;
140   case Token::Public:
141     return Record::Public;
142   case Token::Stack:
143     Tok = consume<Token>(Line);
144     switch (Tok) {
145     case Token::CFI:
146       return Record::StackCFI;
147     case Token::Win:
148       return Record::StackWin;
149     default:
150       return llvm::None;
151     }
152   case Token::Inline:
153     return Record::Inline;
154   case Token::InlineOrigin:
155     return Record::InlineOrigin;
156   case Token::Unknown:
157     // Optimistically assume that any unrecognised token means this is a line
158     // record, those don't have a special keyword and start directly with a
159     // hex number.
160     return Record::Line;
161 
162   case Token::CodeID:
163   case Token::CFI:
164   case Token::Init:
165   case Token::Win:
166     // These should never appear at the start of a valid record.
167     return llvm::None;
168   }
169   llvm_unreachable("Fully covered switch above!");
170 }
171 
172 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
173   // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
174   if (consume<Token>(Line) != Token::Module)
175     return llvm::None;
176 
177   llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line);
178   if (OS == llvm::Triple::UnknownOS)
179     return llvm::None;
180 
181   llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line);
182   if (Arch == llvm::Triple::UnknownArch)
183     return llvm::None;
184 
185   llvm::StringRef Str;
186   std::tie(Str, Line) = getToken(Line);
187   UUID ID = parseModuleId(OS, Str);
188   if (!ID)
189     return llvm::None;
190 
191   return ModuleRecord(OS, Arch, std::move(ID));
192 }
193 
194 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
195                                         const ModuleRecord &R) {
196   return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " "
197             << llvm::Triple::getArchTypeName(R.Arch) << " "
198             << R.ID.GetAsString();
199 }
200 
201 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
202   // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
203   if (consume<Token>(Line) != Token::Info)
204     return llvm::None;
205 
206   if (consume<Token>(Line) != Token::CodeID)
207     return llvm::None;
208 
209   llvm::StringRef Str;
210   std::tie(Str, Line) = getToken(Line);
211   // If we don't have any text following the code ID (e.g. on linux), we should
212   // use this as the UUID. Otherwise, we should revert back to the module ID.
213   UUID ID;
214   if (Line.trim().empty()) {
215     if (Str.empty() || !ID.SetFromStringRef(Str))
216       return llvm::None;
217   }
218   return InfoRecord(std::move(ID));
219 }
220 
221 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
222                                         const InfoRecord &R) {
223   return OS << "INFO CODE_ID " << R.ID.GetAsString();
224 }
225 
226 template <typename T>
227 static llvm::Optional<T> parseNumberName(llvm::StringRef Line,
228                                          Token TokenType) {
229   // TOKEN number name
230   if (consume<Token>(Line) != TokenType)
231     return llvm::None;
232 
233   llvm::StringRef Str;
234   size_t Number;
235   std::tie(Str, Line) = getToken(Line);
236   if (!to_integer(Str, Number))
237     return llvm::None;
238 
239   llvm::StringRef Name = Line.trim();
240   if (Name.empty())
241     return llvm::None;
242 
243   return T(Number, Name);
244 }
245 
246 llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
247   // FILE number name
248   return parseNumberName<FileRecord>(Line, Token::File);
249 }
250 
251 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
252                                         const FileRecord &R) {
253   return OS << "FILE " << R.Number << " " << R.Name;
254 }
255 
256 llvm::Optional<InlineOriginRecord>
257 InlineOriginRecord::parse(llvm::StringRef Line) {
258   // INLINE_ORIGIN number name
259   return parseNumberName<InlineOriginRecord>(Line, Token::InlineOrigin);
260 }
261 
262 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
263                                         const InlineOriginRecord &R) {
264   return OS << "INLINE_ORIGIN " << R.Number << " " << R.Name;
265 }
266 
267 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
268                               lldb::addr_t &Address, lldb::addr_t *Size,
269                               lldb::addr_t &ParamSize, llvm::StringRef &Name) {
270   // PUBLIC [m] address param_size name
271   // or
272   // FUNC [m] address size param_size name
273 
274   Token Tok = Size ? Token::Func : Token::Public;
275 
276   if (consume<Token>(Line) != Tok)
277     return false;
278 
279   llvm::StringRef Str;
280   std::tie(Str, Line) = getToken(Line);
281   Multiple = Str == "m";
282 
283   if (Multiple)
284     std::tie(Str, Line) = getToken(Line);
285   if (!to_integer(Str, Address, 16))
286     return false;
287 
288   if (Tok == Token::Func) {
289     std::tie(Str, Line) = getToken(Line);
290     if (!to_integer(Str, *Size, 16))
291       return false;
292   }
293 
294   std::tie(Str, Line) = getToken(Line);
295   if (!to_integer(Str, ParamSize, 16))
296     return false;
297 
298   Name = Line.trim();
299   if (Name.empty())
300     return false;
301 
302   return true;
303 }
304 
305 llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) {
306   bool Multiple;
307   lldb::addr_t Address, Size, ParamSize;
308   llvm::StringRef Name;
309 
310   if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name))
311     return FuncRecord(Multiple, Address, Size, ParamSize, Name);
312 
313   return llvm::None;
314 }
315 
316 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) {
317   return L.Multiple == R.Multiple && L.Address == R.Address &&
318          L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name;
319 }
320 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
321                                         const FuncRecord &R) {
322   return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
323                              R.Multiple ? "m " : "", R.Address, R.Size,
324                              R.ParamSize, R.Name);
325 }
326 
327 llvm::Optional<InlineRecord> InlineRecord::parse(llvm::StringRef Line) {
328   // INLINE inline_nest_level call_site_line call_site_file_num origin_num
329   // [address size]+
330   if (consume<Token>(Line) != Token::Inline)
331     return llvm::None;
332 
333   llvm::SmallVector<llvm::StringRef> Tokens;
334   SplitString(Line, Tokens, " ");
335   if (Tokens.size() < 6 || Tokens.size() % 2 == 1)
336     return llvm::None;
337 
338   size_t InlineNestLevel;
339   uint32_t CallSiteLineNum;
340   size_t CallSiteFileNum;
341   size_t OriginNum;
342   if (!(to_integer(Tokens[0], InlineNestLevel) &&
343         to_integer(Tokens[1], CallSiteLineNum) &&
344         to_integer(Tokens[2], CallSiteFileNum) &&
345         to_integer(Tokens[3], OriginNum)))
346     return llvm::None;
347 
348   InlineRecord Record = InlineRecord(InlineNestLevel, CallSiteLineNum,
349                                      CallSiteFileNum, OriginNum);
350   for (size_t i = 4; i < Tokens.size(); i += 2) {
351     lldb::addr_t Address;
352     if (!to_integer(Tokens[i], Address, 16))
353       return llvm::None;
354     lldb::addr_t Size;
355     if (!to_integer(Tokens[i + 1].trim(), Size, 16))
356       return llvm::None;
357     Record.Ranges.emplace_back(Address, Size);
358   }
359   return Record;
360 }
361 
362 bool breakpad::operator==(const InlineRecord &L, const InlineRecord &R) {
363   return L.InlineNestLevel == R.InlineNestLevel &&
364          L.CallSiteLineNum == R.CallSiteLineNum &&
365          L.CallSiteFileNum == R.CallSiteFileNum && L.OriginNum == R.OriginNum &&
366          L.Ranges == R.Ranges;
367 }
368 
369 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
370                                         const InlineRecord &R) {
371   OS << llvm::formatv("INLINE {0} {1} {2} {3}", R.InlineNestLevel,
372                       R.CallSiteLineNum, R.CallSiteFileNum, R.OriginNum);
373   for (const auto &range : R.Ranges) {
374     OS << llvm::formatv(" {0:x-} {1:x-}", range.first, range.second);
375   }
376   return OS;
377 }
378 
379 llvm::Optional<LineRecord> LineRecord::parse(llvm::StringRef Line) {
380   lldb::addr_t Address;
381   llvm::StringRef Str;
382   std::tie(Str, Line) = getToken(Line);
383   if (!to_integer(Str, Address, 16))
384     return llvm::None;
385 
386   lldb::addr_t Size;
387   std::tie(Str, Line) = getToken(Line);
388   if (!to_integer(Str, Size, 16))
389     return llvm::None;
390 
391   uint32_t LineNum;
392   std::tie(Str, Line) = getToken(Line);
393   if (!to_integer(Str, LineNum))
394     return llvm::None;
395 
396   size_t FileNum;
397   std::tie(Str, Line) = getToken(Line);
398   if (!to_integer(Str, FileNum))
399     return llvm::None;
400 
401   return LineRecord(Address, Size, LineNum, FileNum);
402 }
403 
404 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) {
405   return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum &&
406          L.FileNum == R.FileNum;
407 }
408 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
409                                         const LineRecord &R) {
410   return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size,
411                              R.LineNum, R.FileNum);
412 }
413 
414 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
415   bool Multiple;
416   lldb::addr_t Address, ParamSize;
417   llvm::StringRef Name;
418 
419   if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name))
420     return PublicRecord(Multiple, Address, ParamSize, Name);
421 
422   return llvm::None;
423 }
424 
425 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) {
426   return L.Multiple == R.Multiple && L.Address == R.Address &&
427          L.ParamSize == R.ParamSize && L.Name == R.Name;
428 }
429 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
430                                         const PublicRecord &R) {
431   return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
432                              R.Multiple ? "m " : "", R.Address, R.ParamSize,
433                              R.Name);
434 }
435 
436 llvm::Optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) {
437   // STACK CFI INIT address size reg1: expr1 reg2: expr2 ...
438   // or
439   // STACK CFI address reg1: expr1 reg2: expr2 ...
440   // No token in exprN ends with a colon.
441 
442   if (consume<Token>(Line) != Token::Stack)
443     return llvm::None;
444   if (consume<Token>(Line) != Token::CFI)
445     return llvm::None;
446 
447   llvm::StringRef Str;
448   std::tie(Str, Line) = getToken(Line);
449 
450   bool IsInitRecord = stringTo<Token>(Str) == Token::Init;
451   if (IsInitRecord)
452     std::tie(Str, Line) = getToken(Line);
453 
454   lldb::addr_t Address;
455   if (!to_integer(Str, Address, 16))
456     return llvm::None;
457 
458   llvm::Optional<lldb::addr_t> Size;
459   if (IsInitRecord) {
460     Size.emplace();
461     std::tie(Str, Line) = getToken(Line);
462     if (!to_integer(Str, *Size, 16))
463       return llvm::None;
464   }
465 
466   return StackCFIRecord(Address, Size, Line.trim());
467 }
468 
469 bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) {
470   return L.Address == R.Address && L.Size == R.Size &&
471          L.UnwindRules == R.UnwindRules;
472 }
473 
474 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
475                                         const StackCFIRecord &R) {
476   OS << "STACK CFI ";
477   if (R.Size)
478     OS << "INIT ";
479   OS << llvm::formatv("{0:x-} ", R.Address);
480   if (R.Size)
481     OS << llvm::formatv("{0:x-} ", *R.Size);
482   return OS << " " << R.UnwindRules;
483 }
484 
485 llvm::Optional<StackWinRecord> StackWinRecord::parse(llvm::StringRef Line) {
486   // STACK WIN type rva code_size prologue_size epilogue_size parameter_size
487   //     saved_register_size local_size max_stack_size has_program_string
488   //     program_string_OR_allocates_base_pointer
489 
490   if (consume<Token>(Line) != Token::Stack)
491     return llvm::None;
492   if (consume<Token>(Line) != Token::Win)
493     return llvm::None;
494 
495   llvm::StringRef Str;
496   uint8_t Type;
497   std::tie(Str, Line) = getToken(Line);
498   // Right now we only support the "FrameData" frame type.
499   if (!to_integer(Str, Type) || FrameType(Type) != FrameType::FrameData)
500     return llvm::None;
501 
502   lldb::addr_t RVA;
503   std::tie(Str, Line) = getToken(Line);
504   if (!to_integer(Str, RVA, 16))
505     return llvm::None;
506 
507   lldb::addr_t CodeSize;
508   std::tie(Str, Line) = getToken(Line);
509   if (!to_integer(Str, CodeSize, 16))
510     return llvm::None;
511 
512   // Skip fields which we aren't using right now.
513   std::tie(Str, Line) = getToken(Line); // prologue_size
514   std::tie(Str, Line) = getToken(Line); // epilogue_size
515 
516   lldb::addr_t ParameterSize;
517   std::tie(Str, Line) = getToken(Line);
518   if (!to_integer(Str, ParameterSize, 16))
519     return llvm::None;
520 
521   lldb::addr_t SavedRegisterSize;
522   std::tie(Str, Line) = getToken(Line);
523   if (!to_integer(Str, SavedRegisterSize, 16))
524     return llvm::None;
525 
526   lldb::addr_t LocalSize;
527   std::tie(Str, Line) = getToken(Line);
528   if (!to_integer(Str, LocalSize, 16))
529     return llvm::None;
530 
531   std::tie(Str, Line) = getToken(Line); // max_stack_size
532 
533   uint8_t HasProgramString;
534   std::tie(Str, Line) = getToken(Line);
535   if (!to_integer(Str, HasProgramString))
536     return llvm::None;
537   // FrameData records should always have a program string.
538   if (!HasProgramString)
539     return llvm::None;
540 
541   return StackWinRecord(RVA, CodeSize, ParameterSize, SavedRegisterSize,
542                         LocalSize, Line.trim());
543 }
544 
545 bool breakpad::operator==(const StackWinRecord &L, const StackWinRecord &R) {
546   return L.RVA == R.RVA && L.CodeSize == R.CodeSize &&
547          L.ParameterSize == R.ParameterSize &&
548          L.SavedRegisterSize == R.SavedRegisterSize &&
549          L.LocalSize == R.LocalSize && L.ProgramString == R.ProgramString;
550 }
551 
552 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
553                                         const StackWinRecord &R) {
554   return OS << llvm::formatv(
555              "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", R.RVA,
556              R.CodeSize, R.ParameterSize, R.SavedRegisterSize, R.LocalSize,
557              R.ProgramString);
558 }
559 
560 llvm::StringRef breakpad::toString(Record::Kind K) {
561   switch (K) {
562   case Record::Module:
563     return "MODULE";
564   case Record::Info:
565     return "INFO";
566   case Record::File:
567     return "FILE";
568   case Record::Func:
569     return "FUNC";
570   case Record::Inline:
571     return "INLINE";
572   case Record::InlineOrigin:
573     return "INLINE_ORIGIN";
574   case Record::Line:
575     return "LINE";
576   case Record::Public:
577     return "PUBLIC";
578   case Record::StackCFI:
579     return "STACK CFI";
580   case Record::StackWin:
581     return "STACK WIN";
582   }
583   llvm_unreachable("Unknown record kind!");
584 }
585