1 //===-- BreakpadRecords.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h" 10 #include "llvm/ADT/StringExtras.h" 11 #include "llvm/ADT/StringSwitch.h" 12 #include "llvm/Support/Endian.h" 13 #include "llvm/Support/FormatVariadic.h" 14 15 using namespace lldb_private; 16 using namespace lldb_private::breakpad; 17 18 namespace { 19 enum class Token { 20 Unknown, 21 Module, 22 Info, 23 CodeID, 24 File, 25 Func, 26 Inline, 27 InlineOrigin, 28 Public, 29 Stack, 30 CFI, 31 Init, 32 Win, 33 }; 34 } 35 36 template<typename T> 37 static T stringTo(llvm::StringRef Str); 38 39 template <> Token stringTo<Token>(llvm::StringRef Str) { 40 return llvm::StringSwitch<Token>(Str) 41 .Case("MODULE", Token::Module) 42 .Case("INFO", Token::Info) 43 .Case("CODE_ID", Token::CodeID) 44 .Case("FILE", Token::File) 45 .Case("FUNC", Token::Func) 46 .Case("INLINE", Token::Inline) 47 .Case("INLINE_ORIGIN", Token::InlineOrigin) 48 .Case("PUBLIC", Token::Public) 49 .Case("STACK", Token::Stack) 50 .Case("CFI", Token::CFI) 51 .Case("INIT", Token::Init) 52 .Case("WIN", Token::Win) 53 .Default(Token::Unknown); 54 } 55 56 template <> 57 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) { 58 using llvm::Triple; 59 return llvm::StringSwitch<Triple::OSType>(Str) 60 .Case("Linux", Triple::Linux) 61 .Case("mac", Triple::MacOSX) 62 .Case("windows", Triple::Win32) 63 .Default(Triple::UnknownOS); 64 } 65 66 template <> 67 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) { 68 using llvm::Triple; 69 return llvm::StringSwitch<Triple::ArchType>(Str) 70 .Case("arm", Triple::arm) 71 .Cases("arm64", "arm64e", Triple::aarch64) 72 .Case("mips", Triple::mips) 73 .Case("ppc", Triple::ppc) 74 .Case("ppc64", Triple::ppc64) 75 .Case("s390", Triple::systemz) 76 .Case("sparc", Triple::sparc) 77 .Case("sparcv9", Triple::sparcv9) 78 .Case("x86", Triple::x86) 79 .Cases("x86_64", "x86_64h", Triple::x86_64) 80 .Default(Triple::UnknownArch); 81 } 82 83 template<typename T> 84 static T consume(llvm::StringRef &Str) { 85 llvm::StringRef Token; 86 std::tie(Token, Str) = getToken(Str); 87 return stringTo<T>(Token); 88 } 89 90 /// Return the number of hex digits needed to encode an (POD) object of a given 91 /// type. 92 template <typename T> static constexpr size_t hex_digits() { 93 return 2 * sizeof(T); 94 } 95 96 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) { 97 struct data_t { 98 using uuid_t = uint8_t[16]; 99 uuid_t uuid; 100 llvm::support::ubig32_t age; 101 } data; 102 static_assert(sizeof(data) == 20, ""); 103 // The textual module id encoding should be between 33 and 40 bytes long, 104 // depending on the size of the age field, which is of variable length. 105 // The first three chunks of the id are encoded in big endian, so we need to 106 // byte-swap those. 107 if (str.size() <= hex_digits<data_t::uuid_t>() || 108 str.size() > hex_digits<data_t>()) 109 return UUID(); 110 if (!all_of(str, llvm::isHexDigit)) 111 return UUID(); 112 113 llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>()); 114 llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>()); 115 116 llvm::copy(fromHex(uuid_str), data.uuid); 117 uint32_t age; 118 bool success = to_integer(age_str, age, 16); 119 assert(success); 120 (void)success; 121 data.age = age; 122 123 // On non-windows, the age field should always be zero, so we don't include to 124 // match the native uuid format of these platforms. 125 return UUID::fromData(&data, os == llvm::Triple::Win32 ? sizeof(data) 126 : sizeof(data.uuid)); 127 } 128 129 llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) { 130 Token Tok = consume<Token>(Line); 131 switch (Tok) { 132 case Token::Module: 133 return Record::Module; 134 case Token::Info: 135 return Record::Info; 136 case Token::File: 137 return Record::File; 138 case Token::Func: 139 return Record::Func; 140 case Token::Public: 141 return Record::Public; 142 case Token::Stack: 143 Tok = consume<Token>(Line); 144 switch (Tok) { 145 case Token::CFI: 146 return Record::StackCFI; 147 case Token::Win: 148 return Record::StackWin; 149 default: 150 return llvm::None; 151 } 152 case Token::Inline: 153 return Record::Inline; 154 case Token::InlineOrigin: 155 return Record::InlineOrigin; 156 case Token::Unknown: 157 // Optimistically assume that any unrecognised token means this is a line 158 // record, those don't have a special keyword and start directly with a 159 // hex number. 160 return Record::Line; 161 162 case Token::CodeID: 163 case Token::CFI: 164 case Token::Init: 165 case Token::Win: 166 // These should never appear at the start of a valid record. 167 return llvm::None; 168 } 169 llvm_unreachable("Fully covered switch above!"); 170 } 171 172 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) { 173 // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out 174 if (consume<Token>(Line) != Token::Module) 175 return llvm::None; 176 177 llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line); 178 if (OS == llvm::Triple::UnknownOS) 179 return llvm::None; 180 181 llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line); 182 if (Arch == llvm::Triple::UnknownArch) 183 return llvm::None; 184 185 llvm::StringRef Str; 186 std::tie(Str, Line) = getToken(Line); 187 UUID ID = parseModuleId(OS, Str); 188 if (!ID) 189 return llvm::None; 190 191 return ModuleRecord(OS, Arch, std::move(ID)); 192 } 193 194 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 195 const ModuleRecord &R) { 196 return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " " 197 << llvm::Triple::getArchTypeName(R.Arch) << " " 198 << R.ID.GetAsString(); 199 } 200 201 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) { 202 // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe] 203 if (consume<Token>(Line) != Token::Info) 204 return llvm::None; 205 206 if (consume<Token>(Line) != Token::CodeID) 207 return llvm::None; 208 209 llvm::StringRef Str; 210 std::tie(Str, Line) = getToken(Line); 211 // If we don't have any text following the code ID (e.g. on linux), we should 212 // use this as the UUID. Otherwise, we should revert back to the module ID. 213 UUID ID; 214 if (Line.trim().empty()) { 215 if (Str.empty() || !ID.SetFromStringRef(Str)) 216 return llvm::None; 217 } 218 return InfoRecord(std::move(ID)); 219 } 220 221 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 222 const InfoRecord &R) { 223 return OS << "INFO CODE_ID " << R.ID.GetAsString(); 224 } 225 226 template <typename T> 227 static llvm::Optional<T> parseNumberName(llvm::StringRef Line, 228 Token TokenType) { 229 // TOKEN number name 230 if (consume<Token>(Line) != TokenType) 231 return llvm::None; 232 233 llvm::StringRef Str; 234 size_t Number; 235 std::tie(Str, Line) = getToken(Line); 236 if (!to_integer(Str, Number)) 237 return llvm::None; 238 239 llvm::StringRef Name = Line.trim(); 240 if (Name.empty()) 241 return llvm::None; 242 243 return T(Number, Name); 244 } 245 246 llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) { 247 // FILE number name 248 return parseNumberName<FileRecord>(Line, Token::File); 249 } 250 251 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 252 const FileRecord &R) { 253 return OS << "FILE " << R.Number << " " << R.Name; 254 } 255 256 llvm::Optional<InlineOriginRecord> 257 InlineOriginRecord::parse(llvm::StringRef Line) { 258 // INLINE_ORIGIN number name 259 return parseNumberName<InlineOriginRecord>(Line, Token::InlineOrigin); 260 } 261 262 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 263 const InlineOriginRecord &R) { 264 return OS << "INLINE_ORIGIN " << R.Number << " " << R.Name; 265 } 266 267 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple, 268 lldb::addr_t &Address, lldb::addr_t *Size, 269 lldb::addr_t &ParamSize, llvm::StringRef &Name) { 270 // PUBLIC [m] address param_size name 271 // or 272 // FUNC [m] address size param_size name 273 274 Token Tok = Size ? Token::Func : Token::Public; 275 276 if (consume<Token>(Line) != Tok) 277 return false; 278 279 llvm::StringRef Str; 280 std::tie(Str, Line) = getToken(Line); 281 Multiple = Str == "m"; 282 283 if (Multiple) 284 std::tie(Str, Line) = getToken(Line); 285 if (!to_integer(Str, Address, 16)) 286 return false; 287 288 if (Tok == Token::Func) { 289 std::tie(Str, Line) = getToken(Line); 290 if (!to_integer(Str, *Size, 16)) 291 return false; 292 } 293 294 std::tie(Str, Line) = getToken(Line); 295 if (!to_integer(Str, ParamSize, 16)) 296 return false; 297 298 Name = Line.trim(); 299 if (Name.empty()) 300 return false; 301 302 return true; 303 } 304 305 llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) { 306 bool Multiple; 307 lldb::addr_t Address, Size, ParamSize; 308 llvm::StringRef Name; 309 310 if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name)) 311 return FuncRecord(Multiple, Address, Size, ParamSize, Name); 312 313 return llvm::None; 314 } 315 316 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) { 317 return L.Multiple == R.Multiple && L.Address == R.Address && 318 L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name; 319 } 320 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 321 const FuncRecord &R) { 322 return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}", 323 R.Multiple ? "m " : "", R.Address, R.Size, 324 R.ParamSize, R.Name); 325 } 326 327 llvm::Optional<InlineRecord> InlineRecord::parse(llvm::StringRef Line) { 328 // INLINE inline_nest_level call_site_line call_site_file_num origin_num 329 // [address size]+ 330 if (consume<Token>(Line) != Token::Inline) 331 return llvm::None; 332 333 llvm::SmallVector<llvm::StringRef> Tokens; 334 SplitString(Line, Tokens, " "); 335 if (Tokens.size() < 6 || Tokens.size() % 2 == 1) 336 return llvm::None; 337 338 size_t InlineNestLevel; 339 uint32_t CallSiteLineNum; 340 size_t CallSiteFileNum; 341 size_t OriginNum; 342 if (!(to_integer(Tokens[0], InlineNestLevel) && 343 to_integer(Tokens[1], CallSiteLineNum) && 344 to_integer(Tokens[2], CallSiteFileNum) && 345 to_integer(Tokens[3], OriginNum))) 346 return llvm::None; 347 348 InlineRecord Record = InlineRecord(InlineNestLevel, CallSiteLineNum, 349 CallSiteFileNum, OriginNum); 350 for (size_t i = 4; i < Tokens.size(); i += 2) { 351 lldb::addr_t Address; 352 if (!to_integer(Tokens[i], Address, 16)) 353 return llvm::None; 354 lldb::addr_t Size; 355 if (!to_integer(Tokens[i + 1].trim(), Size, 16)) 356 return llvm::None; 357 Record.Ranges.emplace_back(Address, Size); 358 } 359 return Record; 360 } 361 362 bool breakpad::operator==(const InlineRecord &L, const InlineRecord &R) { 363 return L.InlineNestLevel == R.InlineNestLevel && 364 L.CallSiteLineNum == R.CallSiteLineNum && 365 L.CallSiteFileNum == R.CallSiteFileNum && L.OriginNum == R.OriginNum && 366 L.Ranges == R.Ranges; 367 } 368 369 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 370 const InlineRecord &R) { 371 OS << llvm::formatv("INLINE {0} {1} {2} {3}", R.InlineNestLevel, 372 R.CallSiteLineNum, R.CallSiteFileNum, R.OriginNum); 373 for (const auto &range : R.Ranges) { 374 OS << llvm::formatv(" {0:x-} {1:x-}", range.first, range.second); 375 } 376 return OS; 377 } 378 379 llvm::Optional<LineRecord> LineRecord::parse(llvm::StringRef Line) { 380 lldb::addr_t Address; 381 llvm::StringRef Str; 382 std::tie(Str, Line) = getToken(Line); 383 if (!to_integer(Str, Address, 16)) 384 return llvm::None; 385 386 lldb::addr_t Size; 387 std::tie(Str, Line) = getToken(Line); 388 if (!to_integer(Str, Size, 16)) 389 return llvm::None; 390 391 uint32_t LineNum; 392 std::tie(Str, Line) = getToken(Line); 393 if (!to_integer(Str, LineNum)) 394 return llvm::None; 395 396 size_t FileNum; 397 std::tie(Str, Line) = getToken(Line); 398 if (!to_integer(Str, FileNum)) 399 return llvm::None; 400 401 return LineRecord(Address, Size, LineNum, FileNum); 402 } 403 404 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) { 405 return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum && 406 L.FileNum == R.FileNum; 407 } 408 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 409 const LineRecord &R) { 410 return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size, 411 R.LineNum, R.FileNum); 412 } 413 414 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) { 415 bool Multiple; 416 lldb::addr_t Address, ParamSize; 417 llvm::StringRef Name; 418 419 if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name)) 420 return PublicRecord(Multiple, Address, ParamSize, Name); 421 422 return llvm::None; 423 } 424 425 bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) { 426 return L.Multiple == R.Multiple && L.Address == R.Address && 427 L.ParamSize == R.ParamSize && L.Name == R.Name; 428 } 429 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 430 const PublicRecord &R) { 431 return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}", 432 R.Multiple ? "m " : "", R.Address, R.ParamSize, 433 R.Name); 434 } 435 436 llvm::Optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) { 437 // STACK CFI INIT address size reg1: expr1 reg2: expr2 ... 438 // or 439 // STACK CFI address reg1: expr1 reg2: expr2 ... 440 // No token in exprN ends with a colon. 441 442 if (consume<Token>(Line) != Token::Stack) 443 return llvm::None; 444 if (consume<Token>(Line) != Token::CFI) 445 return llvm::None; 446 447 llvm::StringRef Str; 448 std::tie(Str, Line) = getToken(Line); 449 450 bool IsInitRecord = stringTo<Token>(Str) == Token::Init; 451 if (IsInitRecord) 452 std::tie(Str, Line) = getToken(Line); 453 454 lldb::addr_t Address; 455 if (!to_integer(Str, Address, 16)) 456 return llvm::None; 457 458 llvm::Optional<lldb::addr_t> Size; 459 if (IsInitRecord) { 460 Size.emplace(); 461 std::tie(Str, Line) = getToken(Line); 462 if (!to_integer(Str, *Size, 16)) 463 return llvm::None; 464 } 465 466 return StackCFIRecord(Address, Size, Line.trim()); 467 } 468 469 bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) { 470 return L.Address == R.Address && L.Size == R.Size && 471 L.UnwindRules == R.UnwindRules; 472 } 473 474 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 475 const StackCFIRecord &R) { 476 OS << "STACK CFI "; 477 if (R.Size) 478 OS << "INIT "; 479 OS << llvm::formatv("{0:x-} ", R.Address); 480 if (R.Size) 481 OS << llvm::formatv("{0:x-} ", *R.Size); 482 return OS << " " << R.UnwindRules; 483 } 484 485 llvm::Optional<StackWinRecord> StackWinRecord::parse(llvm::StringRef Line) { 486 // STACK WIN type rva code_size prologue_size epilogue_size parameter_size 487 // saved_register_size local_size max_stack_size has_program_string 488 // program_string_OR_allocates_base_pointer 489 490 if (consume<Token>(Line) != Token::Stack) 491 return llvm::None; 492 if (consume<Token>(Line) != Token::Win) 493 return llvm::None; 494 495 llvm::StringRef Str; 496 uint8_t Type; 497 std::tie(Str, Line) = getToken(Line); 498 // Right now we only support the "FrameData" frame type. 499 if (!to_integer(Str, Type) || FrameType(Type) != FrameType::FrameData) 500 return llvm::None; 501 502 lldb::addr_t RVA; 503 std::tie(Str, Line) = getToken(Line); 504 if (!to_integer(Str, RVA, 16)) 505 return llvm::None; 506 507 lldb::addr_t CodeSize; 508 std::tie(Str, Line) = getToken(Line); 509 if (!to_integer(Str, CodeSize, 16)) 510 return llvm::None; 511 512 // Skip fields which we aren't using right now. 513 std::tie(Str, Line) = getToken(Line); // prologue_size 514 std::tie(Str, Line) = getToken(Line); // epilogue_size 515 516 lldb::addr_t ParameterSize; 517 std::tie(Str, Line) = getToken(Line); 518 if (!to_integer(Str, ParameterSize, 16)) 519 return llvm::None; 520 521 lldb::addr_t SavedRegisterSize; 522 std::tie(Str, Line) = getToken(Line); 523 if (!to_integer(Str, SavedRegisterSize, 16)) 524 return llvm::None; 525 526 lldb::addr_t LocalSize; 527 std::tie(Str, Line) = getToken(Line); 528 if (!to_integer(Str, LocalSize, 16)) 529 return llvm::None; 530 531 std::tie(Str, Line) = getToken(Line); // max_stack_size 532 533 uint8_t HasProgramString; 534 std::tie(Str, Line) = getToken(Line); 535 if (!to_integer(Str, HasProgramString)) 536 return llvm::None; 537 // FrameData records should always have a program string. 538 if (!HasProgramString) 539 return llvm::None; 540 541 return StackWinRecord(RVA, CodeSize, ParameterSize, SavedRegisterSize, 542 LocalSize, Line.trim()); 543 } 544 545 bool breakpad::operator==(const StackWinRecord &L, const StackWinRecord &R) { 546 return L.RVA == R.RVA && L.CodeSize == R.CodeSize && 547 L.ParameterSize == R.ParameterSize && 548 L.SavedRegisterSize == R.SavedRegisterSize && 549 L.LocalSize == R.LocalSize && L.ProgramString == R.ProgramString; 550 } 551 552 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 553 const StackWinRecord &R) { 554 return OS << llvm::formatv( 555 "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", R.RVA, 556 R.CodeSize, R.ParameterSize, R.SavedRegisterSize, R.LocalSize, 557 R.ProgramString); 558 } 559 560 llvm::StringRef breakpad::toString(Record::Kind K) { 561 switch (K) { 562 case Record::Module: 563 return "MODULE"; 564 case Record::Info: 565 return "INFO"; 566 case Record::File: 567 return "FILE"; 568 case Record::Func: 569 return "FUNC"; 570 case Record::Inline: 571 return "INLINE"; 572 case Record::InlineOrigin: 573 return "INLINE_ORIGIN"; 574 case Record::Line: 575 return "LINE"; 576 case Record::Public: 577 return "PUBLIC"; 578 case Record::StackCFI: 579 return "STACK CFI"; 580 case Record::StackWin: 581 return "STACK WIN"; 582 } 583 llvm_unreachable("Unknown record kind!"); 584 } 585