1 //===-- BreakpadRecords.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h" 10 #include "llvm/ADT/StringExtras.h" 11 #include "llvm/ADT/StringSwitch.h" 12 #include "llvm/Support/Endian.h" 13 #include "llvm/Support/FormatVariadic.h" 14 #include <optional> 15 16 using namespace lldb_private; 17 using namespace lldb_private::breakpad; 18 19 namespace { 20 enum class Token { 21 Unknown, 22 Module, 23 Info, 24 CodeID, 25 File, 26 Func, 27 Inline, 28 InlineOrigin, 29 Public, 30 Stack, 31 CFI, 32 Init, 33 Win, 34 }; 35 } 36 37 template<typename T> 38 static T stringTo(llvm::StringRef Str); 39 40 template <> Token stringTo<Token>(llvm::StringRef Str) { 41 return llvm::StringSwitch<Token>(Str) 42 .Case("MODULE", Token::Module) 43 .Case("INFO", Token::Info) 44 .Case("CODE_ID", Token::CodeID) 45 .Case("FILE", Token::File) 46 .Case("FUNC", Token::Func) 47 .Case("INLINE", Token::Inline) 48 .Case("INLINE_ORIGIN", Token::InlineOrigin) 49 .Case("PUBLIC", Token::Public) 50 .Case("STACK", Token::Stack) 51 .Case("CFI", Token::CFI) 52 .Case("INIT", Token::Init) 53 .Case("WIN", Token::Win) 54 .Default(Token::Unknown); 55 } 56 57 template <> 58 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) { 59 using llvm::Triple; 60 return llvm::StringSwitch<Triple::OSType>(Str) 61 .Case("Linux", Triple::Linux) 62 .Case("mac", Triple::MacOSX) 63 .Case("windows", Triple::Win32) 64 .Default(Triple::UnknownOS); 65 } 66 67 template <> 68 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) { 69 using llvm::Triple; 70 return llvm::StringSwitch<Triple::ArchType>(Str) 71 .Case("arm", Triple::arm) 72 .Cases("arm64", "arm64e", Triple::aarch64) 73 .Case("mips", Triple::mips) 74 .Case("msp430", Triple::msp430) 75 .Case("ppc", Triple::ppc) 76 .Case("ppc64", Triple::ppc64) 77 .Case("s390", Triple::systemz) 78 .Case("sparc", Triple::sparc) 79 .Case("sparcv9", Triple::sparcv9) 80 .Case("x86", Triple::x86) 81 .Cases("x86_64", "x86_64h", Triple::x86_64) 82 .Default(Triple::UnknownArch); 83 } 84 85 template<typename T> 86 static T consume(llvm::StringRef &Str) { 87 llvm::StringRef Token; 88 std::tie(Token, Str) = getToken(Str); 89 return stringTo<T>(Token); 90 } 91 92 /// Return the number of hex digits needed to encode an (POD) object of a given 93 /// type. 94 template <typename T> static constexpr size_t hex_digits() { 95 return 2 * sizeof(T); 96 } 97 98 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) { 99 struct data_t { 100 using uuid_t = uint8_t[16]; 101 uuid_t uuid; 102 llvm::support::ubig32_t age; 103 } data; 104 static_assert(sizeof(data) == 20); 105 // The textual module id encoding should be between 33 and 40 bytes long, 106 // depending on the size of the age field, which is of variable length. 107 // The first three chunks of the id are encoded in big endian, so we need to 108 // byte-swap those. 109 if (str.size() <= hex_digits<data_t::uuid_t>() || 110 str.size() > hex_digits<data_t>()) 111 return UUID(); 112 if (!all_of(str, llvm::isHexDigit)) 113 return UUID(); 114 115 llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>()); 116 llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>()); 117 118 llvm::copy(fromHex(uuid_str), data.uuid); 119 uint32_t age; 120 bool success = to_integer(age_str, age, 16); 121 assert(success); 122 (void)success; 123 data.age = age; 124 125 // On non-windows, the age field should always be zero, so we don't include to 126 // match the native uuid format of these platforms. 127 return UUID(&data, os == llvm::Triple::Win32 ? sizeof(data) 128 : sizeof(data.uuid)); 129 } 130 131 std::optional<Record::Kind> Record::classify(llvm::StringRef Line) { 132 Token Tok = consume<Token>(Line); 133 switch (Tok) { 134 case Token::Module: 135 return Record::Module; 136 case Token::Info: 137 return Record::Info; 138 case Token::File: 139 return Record::File; 140 case Token::Func: 141 return Record::Func; 142 case Token::Public: 143 return Record::Public; 144 case Token::Stack: 145 Tok = consume<Token>(Line); 146 switch (Tok) { 147 case Token::CFI: 148 return Record::StackCFI; 149 case Token::Win: 150 return Record::StackWin; 151 default: 152 return std::nullopt; 153 } 154 case Token::Inline: 155 return Record::Inline; 156 case Token::InlineOrigin: 157 return Record::InlineOrigin; 158 case Token::Unknown: 159 // Optimistically assume that any unrecognised token means this is a line 160 // record, those don't have a special keyword and start directly with a 161 // hex number. 162 return Record::Line; 163 164 case Token::CodeID: 165 case Token::CFI: 166 case Token::Init: 167 case Token::Win: 168 // These should never appear at the start of a valid record. 169 return std::nullopt; 170 } 171 llvm_unreachable("Fully covered switch above!"); 172 } 173 174 std::optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) { 175 // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out 176 if (consume<Token>(Line) != Token::Module) 177 return std::nullopt; 178 179 llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line); 180 if (OS == llvm::Triple::UnknownOS) 181 return std::nullopt; 182 183 llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line); 184 if (Arch == llvm::Triple::UnknownArch) 185 return std::nullopt; 186 187 llvm::StringRef Str; 188 std::tie(Str, Line) = getToken(Line); 189 UUID ID = parseModuleId(OS, Str); 190 if (!ID) 191 return std::nullopt; 192 193 return ModuleRecord(OS, Arch, std::move(ID)); 194 } 195 196 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 197 const ModuleRecord &R) { 198 return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " " 199 << llvm::Triple::getArchTypeName(R.Arch) << " " 200 << R.ID.GetAsString(); 201 } 202 203 std::optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) { 204 // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe] 205 if (consume<Token>(Line) != Token::Info) 206 return std::nullopt; 207 208 if (consume<Token>(Line) != Token::CodeID) 209 return std::nullopt; 210 211 llvm::StringRef Str; 212 std::tie(Str, Line) = getToken(Line); 213 // If we don't have any text following the code ID (e.g. on linux), we should 214 // use this as the UUID. Otherwise, we should revert back to the module ID. 215 UUID ID; 216 if (Line.trim().empty()) { 217 if (Str.empty() || !ID.SetFromStringRef(Str)) 218 return std::nullopt; 219 } 220 return InfoRecord(std::move(ID)); 221 } 222 223 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 224 const InfoRecord &R) { 225 return OS << "INFO CODE_ID " << R.ID.GetAsString(); 226 } 227 228 template <typename T> 229 static std::optional<T> parseNumberName(llvm::StringRef Line, Token TokenType) { 230 // TOKEN number name 231 if (consume<Token>(Line) != TokenType) 232 return std::nullopt; 233 234 llvm::StringRef Str; 235 size_t Number; 236 std::tie(Str, Line) = getToken(Line); 237 if (!to_integer(Str, Number)) 238 return std::nullopt; 239 240 llvm::StringRef Name = Line.trim(); 241 if (Name.empty()) 242 return std::nullopt; 243 244 return T(Number, Name); 245 } 246 247 std::optional<FileRecord> FileRecord::parse(llvm::StringRef Line) { 248 // FILE number name 249 return parseNumberName<FileRecord>(Line, Token::File); 250 } 251 252 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 253 const FileRecord &R) { 254 return OS << "FILE " << R.Number << " " << R.Name; 255 } 256 257 std::optional<InlineOriginRecord> 258 InlineOriginRecord::parse(llvm::StringRef Line) { 259 // INLINE_ORIGIN number name 260 return parseNumberName<InlineOriginRecord>(Line, Token::InlineOrigin); 261 } 262 263 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 264 const InlineOriginRecord &R) { 265 return OS << "INLINE_ORIGIN " << R.Number << " " << R.Name; 266 } 267 268 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple, 269 lldb::addr_t &Address, lldb::addr_t *Size, 270 lldb::addr_t &ParamSize, llvm::StringRef &Name) { 271 // PUBLIC [m] address param_size name 272 // or 273 // FUNC [m] address size param_size name 274 275 Token Tok = Size ? Token::Func : Token::Public; 276 277 if (consume<Token>(Line) != Tok) 278 return false; 279 280 llvm::StringRef Str; 281 std::tie(Str, Line) = getToken(Line); 282 Multiple = Str == "m"; 283 284 if (Multiple) 285 std::tie(Str, Line) = getToken(Line); 286 if (!to_integer(Str, Address, 16)) 287 return false; 288 289 if (Tok == Token::Func) { 290 std::tie(Str, Line) = getToken(Line); 291 if (!to_integer(Str, *Size, 16)) 292 return false; 293 } 294 295 std::tie(Str, Line) = getToken(Line); 296 if (!to_integer(Str, ParamSize, 16)) 297 return false; 298 299 Name = Line.trim(); 300 if (Name.empty()) 301 return false; 302 303 return true; 304 } 305 306 std::optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) { 307 bool Multiple; 308 lldb::addr_t Address, Size, ParamSize; 309 llvm::StringRef Name; 310 311 if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name)) 312 return FuncRecord(Multiple, Address, Size, ParamSize, Name); 313 314 return std::nullopt; 315 } 316 317 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) { 318 return L.Multiple == R.Multiple && L.Address == R.Address && 319 L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name; 320 } 321 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 322 const FuncRecord &R) { 323 return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}", 324 R.Multiple ? "m " : "", R.Address, R.Size, 325 R.ParamSize, R.Name); 326 } 327 328 std::optional<InlineRecord> InlineRecord::parse(llvm::StringRef Line) { 329 // INLINE inline_nest_level call_site_line call_site_file_num origin_num 330 // [address size]+ 331 if (consume<Token>(Line) != Token::Inline) 332 return std::nullopt; 333 334 llvm::SmallVector<llvm::StringRef> Tokens; 335 SplitString(Line, Tokens, " "); 336 if (Tokens.size() < 6 || Tokens.size() % 2 == 1) 337 return std::nullopt; 338 339 size_t InlineNestLevel; 340 uint32_t CallSiteLineNum; 341 size_t CallSiteFileNum; 342 size_t OriginNum; 343 if (!(to_integer(Tokens[0], InlineNestLevel) && 344 to_integer(Tokens[1], CallSiteLineNum) && 345 to_integer(Tokens[2], CallSiteFileNum) && 346 to_integer(Tokens[3], OriginNum))) 347 return std::nullopt; 348 349 InlineRecord Record = InlineRecord(InlineNestLevel, CallSiteLineNum, 350 CallSiteFileNum, OriginNum); 351 for (size_t i = 4; i < Tokens.size(); i += 2) { 352 lldb::addr_t Address; 353 if (!to_integer(Tokens[i], Address, 16)) 354 return std::nullopt; 355 lldb::addr_t Size; 356 if (!to_integer(Tokens[i + 1].trim(), Size, 16)) 357 return std::nullopt; 358 Record.Ranges.emplace_back(Address, Size); 359 } 360 return Record; 361 } 362 363 bool breakpad::operator==(const InlineRecord &L, const InlineRecord &R) { 364 return L.InlineNestLevel == R.InlineNestLevel && 365 L.CallSiteLineNum == R.CallSiteLineNum && 366 L.CallSiteFileNum == R.CallSiteFileNum && L.OriginNum == R.OriginNum && 367 L.Ranges == R.Ranges; 368 } 369 370 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 371 const InlineRecord &R) { 372 OS << llvm::formatv("INLINE {0} {1} {2} {3}", R.InlineNestLevel, 373 R.CallSiteLineNum, R.CallSiteFileNum, R.OriginNum); 374 for (const auto &range : R.Ranges) { 375 OS << llvm::formatv(" {0:x-} {1:x-}", range.first, range.second); 376 } 377 return OS; 378 } 379 380 std::optional<LineRecord> LineRecord::parse(llvm::StringRef Line) { 381 lldb::addr_t Address; 382 llvm::StringRef Str; 383 std::tie(Str, Line) = getToken(Line); 384 if (!to_integer(Str, Address, 16)) 385 return std::nullopt; 386 387 lldb::addr_t Size; 388 std::tie(Str, Line) = getToken(Line); 389 if (!to_integer(Str, Size, 16)) 390 return std::nullopt; 391 392 uint32_t LineNum; 393 std::tie(Str, Line) = getToken(Line); 394 if (!to_integer(Str, LineNum)) 395 return std::nullopt; 396 397 size_t FileNum; 398 std::tie(Str, Line) = getToken(Line); 399 if (!to_integer(Str, FileNum)) 400 return std::nullopt; 401 402 return LineRecord(Address, Size, LineNum, FileNum); 403 } 404 405 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) { 406 return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum && 407 L.FileNum == R.FileNum; 408 } 409 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 410 const LineRecord &R) { 411 return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size, 412 R.LineNum, R.FileNum); 413 } 414 415 std::optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) { 416 bool Multiple; 417 lldb::addr_t Address, ParamSize; 418 llvm::StringRef Name; 419 420 if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name)) 421 return PublicRecord(Multiple, Address, ParamSize, Name); 422 423 return std::nullopt; 424 } 425 426 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) { 427 return L.Multiple == R.Multiple && L.Address == R.Address && 428 L.ParamSize == R.ParamSize && L.Name == R.Name; 429 } 430 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 431 const PublicRecord &R) { 432 return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}", 433 R.Multiple ? "m " : "", R.Address, R.ParamSize, 434 R.Name); 435 } 436 437 std::optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) { 438 // STACK CFI INIT address size reg1: expr1 reg2: expr2 ... 439 // or 440 // STACK CFI address reg1: expr1 reg2: expr2 ... 441 // No token in exprN ends with a colon. 442 443 if (consume<Token>(Line) != Token::Stack) 444 return std::nullopt; 445 if (consume<Token>(Line) != Token::CFI) 446 return std::nullopt; 447 448 llvm::StringRef Str; 449 std::tie(Str, Line) = getToken(Line); 450 451 bool IsInitRecord = stringTo<Token>(Str) == Token::Init; 452 if (IsInitRecord) 453 std::tie(Str, Line) = getToken(Line); 454 455 lldb::addr_t Address; 456 if (!to_integer(Str, Address, 16)) 457 return std::nullopt; 458 459 std::optional<lldb::addr_t> Size; 460 if (IsInitRecord) { 461 Size.emplace(); 462 std::tie(Str, Line) = getToken(Line); 463 if (!to_integer(Str, *Size, 16)) 464 return std::nullopt; 465 } 466 467 return StackCFIRecord(Address, Size, Line.trim()); 468 } 469 470 bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) { 471 return L.Address == R.Address && L.Size == R.Size && 472 L.UnwindRules == R.UnwindRules; 473 } 474 475 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 476 const StackCFIRecord &R) { 477 OS << "STACK CFI "; 478 if (R.Size) 479 OS << "INIT "; 480 OS << llvm::formatv("{0:x-} ", R.Address); 481 if (R.Size) 482 OS << llvm::formatv("{0:x-} ", *R.Size); 483 return OS << " " << R.UnwindRules; 484 } 485 486 std::optional<StackWinRecord> StackWinRecord::parse(llvm::StringRef Line) { 487 // STACK WIN type rva code_size prologue_size epilogue_size parameter_size 488 // saved_register_size local_size max_stack_size has_program_string 489 // program_string_OR_allocates_base_pointer 490 491 if (consume<Token>(Line) != Token::Stack) 492 return std::nullopt; 493 if (consume<Token>(Line) != Token::Win) 494 return std::nullopt; 495 496 llvm::StringRef Str; 497 uint8_t Type; 498 std::tie(Str, Line) = getToken(Line); 499 // Right now we only support the "FrameData" frame type. 500 if (!to_integer(Str, Type) || FrameType(Type) != FrameType::FrameData) 501 return std::nullopt; 502 503 lldb::addr_t RVA; 504 std::tie(Str, Line) = getToken(Line); 505 if (!to_integer(Str, RVA, 16)) 506 return std::nullopt; 507 508 lldb::addr_t CodeSize; 509 std::tie(Str, Line) = getToken(Line); 510 if (!to_integer(Str, CodeSize, 16)) 511 return std::nullopt; 512 513 // Skip fields which we aren't using right now. 514 std::tie(Str, Line) = getToken(Line); // prologue_size 515 std::tie(Str, Line) = getToken(Line); // epilogue_size 516 517 lldb::addr_t ParameterSize; 518 std::tie(Str, Line) = getToken(Line); 519 if (!to_integer(Str, ParameterSize, 16)) 520 return std::nullopt; 521 522 lldb::addr_t SavedRegisterSize; 523 std::tie(Str, Line) = getToken(Line); 524 if (!to_integer(Str, SavedRegisterSize, 16)) 525 return std::nullopt; 526 527 lldb::addr_t LocalSize; 528 std::tie(Str, Line) = getToken(Line); 529 if (!to_integer(Str, LocalSize, 16)) 530 return std::nullopt; 531 532 std::tie(Str, Line) = getToken(Line); // max_stack_size 533 534 uint8_t HasProgramString; 535 std::tie(Str, Line) = getToken(Line); 536 if (!to_integer(Str, HasProgramString)) 537 return std::nullopt; 538 // FrameData records should always have a program string. 539 if (!HasProgramString) 540 return std::nullopt; 541 542 return StackWinRecord(RVA, CodeSize, ParameterSize, SavedRegisterSize, 543 LocalSize, Line.trim()); 544 } 545 546 bool breakpad::operator==(const StackWinRecord &L, const StackWinRecord &R) { 547 return L.RVA == R.RVA && L.CodeSize == R.CodeSize && 548 L.ParameterSize == R.ParameterSize && 549 L.SavedRegisterSize == R.SavedRegisterSize && 550 L.LocalSize == R.LocalSize && L.ProgramString == R.ProgramString; 551 } 552 553 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 554 const StackWinRecord &R) { 555 return OS << llvm::formatv( 556 "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", R.RVA, 557 R.CodeSize, R.ParameterSize, R.SavedRegisterSize, R.LocalSize, 558 R.ProgramString); 559 } 560 561 llvm::StringRef breakpad::toString(Record::Kind K) { 562 switch (K) { 563 case Record::Module: 564 return "MODULE"; 565 case Record::Info: 566 return "INFO"; 567 case Record::File: 568 return "FILE"; 569 case Record::Func: 570 return "FUNC"; 571 case Record::Inline: 572 return "INLINE"; 573 case Record::InlineOrigin: 574 return "INLINE_ORIGIN"; 575 case Record::Line: 576 return "LINE"; 577 case Record::Public: 578 return "PUBLIC"; 579 case Record::StackCFI: 580 return "STACK CFI"; 581 case Record::StackWin: 582 return "STACK WIN"; 583 } 584 llvm_unreachable("Unknown record kind!"); 585 } 586