1 //===- TypeIndexDiscovery.cpp -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" 9 10 #include "llvm/ADT/ArrayRef.h" 11 #include "llvm/Support/Endian.h" 12 13 using namespace llvm; 14 using namespace llvm::codeview; 15 16 static inline MethodKind getMethodKind(uint16_t Attrs) { 17 Attrs &= uint16_t(MethodOptions::MethodKindMask); 18 Attrs >>= 2; 19 return MethodKind(Attrs); 20 } 21 22 static inline bool isIntroVirtual(uint16_t Attrs) { 23 MethodKind MK = getMethodKind(Attrs); 24 return MK == MethodKind::IntroducingVirtual || 25 MK == MethodKind::PureIntroducingVirtual; 26 } 27 28 static inline PointerMode getPointerMode(uint32_t Attrs) { 29 return static_cast<PointerMode>((Attrs >> PointerRecord::PointerModeShift) & 30 PointerRecord::PointerModeMask); 31 } 32 33 static inline bool isMemberPointer(uint32_t Attrs) { 34 PointerMode Mode = getPointerMode(Attrs); 35 return Mode == PointerMode::PointerToDataMember || 36 Mode == PointerMode::PointerToMemberFunction; 37 } 38 39 static inline uint32_t getEncodedIntegerLength(ArrayRef<uint8_t> Data) { 40 uint16_t N = support::endian::read16le(Data.data()); 41 if (N < LF_NUMERIC) 42 return 2; 43 44 assert(N <= LF_UQUADWORD); 45 46 constexpr uint32_t Sizes[] = { 47 1, // LF_CHAR 48 2, // LF_SHORT 49 2, // LF_USHORT 50 4, // LF_LONG 51 4, // LF_ULONG 52 4, // LF_REAL32 53 8, // LF_REAL64 54 10, // LF_REAL80 55 16, // LF_REAL128 56 8, // LF_QUADWORD 57 8, // LF_UQUADWORD 58 }; 59 60 return 2 + Sizes[N - LF_NUMERIC]; 61 } 62 63 static inline uint32_t getCStringLength(ArrayRef<uint8_t> Data) { 64 const char *S = reinterpret_cast<const char *>(Data.data()); 65 return strlen(S) + 1; 66 } 67 68 static void handleMethodOverloadList(ArrayRef<uint8_t> Content, 69 SmallVectorImpl<TiReference> &Refs) { 70 uint32_t Offset = 0; 71 72 while (!Content.empty()) { 73 // Array of: 74 // 0: Attrs 75 // 2: Padding 76 // 4: TypeIndex 77 // if (isIntroVirtual()) 78 // 8: VFTableOffset 79 80 // At least 8 bytes are guaranteed. 4 extra bytes come iff function is an 81 // intro virtual. 82 uint32_t Len = 8; 83 84 uint16_t Attrs = support::endian::read16le(Content.data()); 85 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 86 87 if (LLVM_UNLIKELY(isIntroVirtual(Attrs))) 88 Len += 4; 89 Offset += Len; 90 Content = Content.drop_front(Len); 91 } 92 } 93 94 static uint32_t handleBaseClass(ArrayRef<uint8_t> Data, uint32_t Offset, 95 SmallVectorImpl<TiReference> &Refs) { 96 // 0: Kind 97 // 2: Padding 98 // 4: TypeIndex 99 // 8: Encoded Integer 100 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 101 return 8 + getEncodedIntegerLength(Data.drop_front(8)); 102 } 103 104 static uint32_t handleEnumerator(ArrayRef<uint8_t> Data, uint32_t Offset, 105 SmallVectorImpl<TiReference> &Refs) { 106 // 0: Kind 107 // 2: Padding 108 // 4: Encoded Integer 109 // <next>: Name 110 uint32_t Size = 4 + getEncodedIntegerLength(Data.drop_front(4)); 111 return Size + getCStringLength(Data.drop_front(Size)); 112 } 113 114 static uint32_t handleDataMember(ArrayRef<uint8_t> Data, uint32_t Offset, 115 SmallVectorImpl<TiReference> &Refs) { 116 // 0: Kind 117 // 2: Padding 118 // 4: TypeIndex 119 // 8: Encoded Integer 120 // <next>: Name 121 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 122 uint32_t Size = 8 + getEncodedIntegerLength(Data.drop_front(8)); 123 return Size + getCStringLength(Data.drop_front(Size)); 124 } 125 126 static uint32_t handleOverloadedMethod(ArrayRef<uint8_t> Data, uint32_t Offset, 127 SmallVectorImpl<TiReference> &Refs) { 128 // 0: Kind 129 // 2: Padding 130 // 4: TypeIndex 131 // 8: Name 132 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 133 return 8 + getCStringLength(Data.drop_front(8)); 134 } 135 136 static uint32_t handleOneMethod(ArrayRef<uint8_t> Data, uint32_t Offset, 137 SmallVectorImpl<TiReference> &Refs) { 138 // 0: Kind 139 // 2: Attributes 140 // 4: Type 141 // if (isIntroVirtual) 142 // 8: VFTableOffset 143 // <next>: Name 144 uint32_t Size = 8; 145 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 146 147 uint16_t Attrs = support::endian::read16le(Data.drop_front(2).data()); 148 if (LLVM_UNLIKELY(isIntroVirtual(Attrs))) 149 Size += 4; 150 151 return Size + getCStringLength(Data.drop_front(Size)); 152 } 153 154 static uint32_t handleNestedType(ArrayRef<uint8_t> Data, uint32_t Offset, 155 SmallVectorImpl<TiReference> &Refs) { 156 // 0: Kind 157 // 2: Padding 158 // 4: TypeIndex 159 // 8: Name 160 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 161 return 8 + getCStringLength(Data.drop_front(8)); 162 } 163 164 static uint32_t handleStaticDataMember(ArrayRef<uint8_t> Data, uint32_t Offset, 165 SmallVectorImpl<TiReference> &Refs) { 166 // 0: Kind 167 // 2: Padding 168 // 4: TypeIndex 169 // 8: Name 170 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 171 return 8 + getCStringLength(Data.drop_front(8)); 172 } 173 174 static uint32_t handleVirtualBaseClass(ArrayRef<uint8_t> Data, uint32_t Offset, 175 bool IsIndirect, 176 SmallVectorImpl<TiReference> &Refs) { 177 // 0: Kind 178 // 2: Attrs 179 // 4: TypeIndex 180 // 8: TypeIndex 181 // 12: Encoded Integer 182 // <next>: Encoded Integer 183 uint32_t Size = 12; 184 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 2}); 185 Size += getEncodedIntegerLength(Data.drop_front(Size)); 186 Size += getEncodedIntegerLength(Data.drop_front(Size)); 187 return Size; 188 } 189 190 static uint32_t handleVFPtr(ArrayRef<uint8_t> Data, uint32_t Offset, 191 SmallVectorImpl<TiReference> &Refs) { 192 // 0: Kind 193 // 2: Padding 194 // 4: TypeIndex 195 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 196 return 8; 197 } 198 199 static uint32_t handleListContinuation(ArrayRef<uint8_t> Data, uint32_t Offset, 200 SmallVectorImpl<TiReference> &Refs) { 201 // 0: Kind 202 // 2: Padding 203 // 4: TypeIndex 204 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 205 return 8; 206 } 207 208 static void handleFieldList(ArrayRef<uint8_t> Content, 209 SmallVectorImpl<TiReference> &Refs) { 210 uint32_t Offset = 0; 211 uint32_t ThisLen = 0; 212 while (!Content.empty()) { 213 TypeLeafKind Kind = 214 static_cast<TypeLeafKind>(support::endian::read16le(Content.data())); 215 switch (Kind) { 216 case LF_BCLASS: 217 ThisLen = handleBaseClass(Content, Offset, Refs); 218 break; 219 case LF_ENUMERATE: 220 ThisLen = handleEnumerator(Content, Offset, Refs); 221 break; 222 case LF_MEMBER: 223 ThisLen = handleDataMember(Content, Offset, Refs); 224 break; 225 case LF_METHOD: 226 ThisLen = handleOverloadedMethod(Content, Offset, Refs); 227 break; 228 case LF_ONEMETHOD: 229 ThisLen = handleOneMethod(Content, Offset, Refs); 230 break; 231 case LF_NESTTYPE: 232 ThisLen = handleNestedType(Content, Offset, Refs); 233 break; 234 case LF_STMEMBER: 235 ThisLen = handleStaticDataMember(Content, Offset, Refs); 236 break; 237 case LF_VBCLASS: 238 case LF_IVBCLASS: 239 ThisLen = 240 handleVirtualBaseClass(Content, Offset, Kind == LF_VBCLASS, Refs); 241 break; 242 case LF_VFUNCTAB: 243 ThisLen = handleVFPtr(Content, Offset, Refs); 244 break; 245 case LF_INDEX: 246 ThisLen = handleListContinuation(Content, Offset, Refs); 247 break; 248 default: 249 return; 250 } 251 Content = Content.drop_front(ThisLen); 252 Offset += ThisLen; 253 if (!Content.empty()) { 254 uint8_t Pad = Content.front(); 255 if (Pad >= LF_PAD0) { 256 uint32_t Skip = Pad & 0x0F; 257 Content = Content.drop_front(Skip); 258 Offset += Skip; 259 } 260 } 261 } 262 } 263 264 static void handlePointer(ArrayRef<uint8_t> Content, 265 SmallVectorImpl<TiReference> &Refs) { 266 Refs.push_back({TiRefKind::TypeRef, 0, 1}); 267 268 uint32_t Attrs = support::endian::read32le(Content.drop_front(4).data()); 269 if (isMemberPointer(Attrs)) 270 Refs.push_back({TiRefKind::TypeRef, 8, 1}); 271 } 272 273 static void discoverTypeIndices(ArrayRef<uint8_t> Content, TypeLeafKind Kind, 274 SmallVectorImpl<TiReference> &Refs) { 275 uint32_t Count; 276 // FIXME: In the future it would be nice if we could avoid hardcoding these 277 // values. One idea is to define some structures representing these types 278 // that would allow the use of offsetof(). 279 switch (Kind) { 280 case TypeLeafKind::LF_FUNC_ID: 281 Refs.push_back({TiRefKind::IndexRef, 0, 1}); 282 Refs.push_back({TiRefKind::TypeRef, 4, 1}); 283 break; 284 case TypeLeafKind::LF_MFUNC_ID: 285 Refs.push_back({TiRefKind::TypeRef, 0, 2}); 286 break; 287 case TypeLeafKind::LF_STRING_ID: 288 Refs.push_back({TiRefKind::IndexRef, 0, 1}); 289 break; 290 case TypeLeafKind::LF_SUBSTR_LIST: 291 Count = support::endian::read32le(Content.data()); 292 if (Count > 0) 293 Refs.push_back({TiRefKind::IndexRef, 4, Count}); 294 break; 295 case TypeLeafKind::LF_BUILDINFO: 296 Count = support::endian::read16le(Content.data()); 297 if (Count > 0) 298 Refs.push_back({TiRefKind::IndexRef, 2, Count}); 299 break; 300 case TypeLeafKind::LF_UDT_SRC_LINE: 301 Refs.push_back({TiRefKind::TypeRef, 0, 1}); 302 Refs.push_back({TiRefKind::IndexRef, 4, 1}); 303 break; 304 case TypeLeafKind::LF_UDT_MOD_SRC_LINE: 305 Refs.push_back({TiRefKind::TypeRef, 0, 1}); 306 break; 307 case TypeLeafKind::LF_MODIFIER: 308 Refs.push_back({TiRefKind::TypeRef, 0, 1}); 309 break; 310 case TypeLeafKind::LF_PROCEDURE: 311 Refs.push_back({TiRefKind::TypeRef, 0, 1}); 312 Refs.push_back({TiRefKind::TypeRef, 8, 1}); 313 break; 314 case TypeLeafKind::LF_MFUNCTION: 315 Refs.push_back({TiRefKind::TypeRef, 0, 3}); 316 Refs.push_back({TiRefKind::TypeRef, 16, 1}); 317 break; 318 case TypeLeafKind::LF_ARGLIST: 319 Count = support::endian::read32le(Content.data()); 320 if (Count > 0) 321 Refs.push_back({TiRefKind::TypeRef, 4, Count}); 322 break; 323 case TypeLeafKind::LF_ARRAY: 324 Refs.push_back({TiRefKind::TypeRef, 0, 2}); 325 break; 326 case TypeLeafKind::LF_CLASS: 327 case TypeLeafKind::LF_STRUCTURE: 328 case TypeLeafKind::LF_INTERFACE: 329 Refs.push_back({TiRefKind::TypeRef, 4, 3}); 330 break; 331 case TypeLeafKind::LF_UNION: 332 Refs.push_back({TiRefKind::TypeRef, 4, 1}); 333 break; 334 case TypeLeafKind::LF_ENUM: 335 Refs.push_back({TiRefKind::TypeRef, 4, 2}); 336 break; 337 case TypeLeafKind::LF_BITFIELD: 338 Refs.push_back({TiRefKind::TypeRef, 0, 1}); 339 break; 340 case TypeLeafKind::LF_VFTABLE: 341 Refs.push_back({TiRefKind::TypeRef, 0, 2}); 342 break; 343 case TypeLeafKind::LF_VTSHAPE: 344 break; 345 case TypeLeafKind::LF_METHODLIST: 346 handleMethodOverloadList(Content, Refs); 347 break; 348 case TypeLeafKind::LF_FIELDLIST: 349 handleFieldList(Content, Refs); 350 break; 351 case TypeLeafKind::LF_POINTER: 352 handlePointer(Content, Refs); 353 break; 354 default: 355 break; 356 } 357 } 358 359 static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind, 360 SmallVectorImpl<TiReference> &Refs) { 361 uint32_t Count; 362 // FIXME: In the future it would be nice if we could avoid hardcoding these 363 // values. One idea is to define some structures representing these types 364 // that would allow the use of offsetof(). 365 switch (Kind) { 366 case SymbolKind::S_GPROC32_ID: 367 case SymbolKind::S_LPROC32_ID: 368 case SymbolKind::S_LPROC32_DPC: 369 case SymbolKind::S_LPROC32_DPC_ID: 370 Refs.push_back({TiRefKind::IndexRef, 24, 1}); // LF_FUNC_ID 371 break; 372 case SymbolKind::S_GPROC32: 373 case SymbolKind::S_LPROC32: 374 Refs.push_back({TiRefKind::TypeRef, 24, 1}); // Type 375 break; 376 case SymbolKind::S_UDT: 377 Refs.push_back({TiRefKind::TypeRef, 0, 1}); // UDT 378 break; 379 case SymbolKind::S_GDATA32: 380 case SymbolKind::S_LDATA32: 381 Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type 382 break; 383 case SymbolKind::S_BUILDINFO: 384 Refs.push_back({TiRefKind::IndexRef, 0, 1}); // Compile flags 385 break; 386 case SymbolKind::S_LTHREAD32: 387 case SymbolKind::S_GTHREAD32: 388 Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type 389 break; 390 case SymbolKind::S_FILESTATIC: 391 Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type 392 break; 393 case SymbolKind::S_LOCAL: 394 Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type 395 break; 396 case SymbolKind::S_REGISTER: 397 Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type 398 break; 399 case SymbolKind::S_CONSTANT: 400 Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type 401 break; 402 case SymbolKind::S_BPREL32: 403 case SymbolKind::S_REGREL32: 404 Refs.push_back({TiRefKind::TypeRef, 4, 1}); // Type 405 break; 406 case SymbolKind::S_CALLSITEINFO: 407 Refs.push_back({TiRefKind::TypeRef, 8, 1}); // Call signature 408 break; 409 case SymbolKind::S_CALLERS: 410 case SymbolKind::S_CALLEES: 411 case SymbolKind::S_INLINEES: 412 // The record is a count followed by an array of type indices. 413 Count = *reinterpret_cast<const ulittle32_t *>(Content.data()); 414 Refs.push_back({TiRefKind::IndexRef, 4, Count}); // Callees 415 break; 416 case SymbolKind::S_INLINESITE: 417 Refs.push_back({TiRefKind::IndexRef, 8, 1}); // ID of inlinee 418 break; 419 case SymbolKind::S_HEAPALLOCSITE: 420 Refs.push_back({TiRefKind::TypeRef, 8, 1}); // UDT allocated 421 break; 422 423 // Defranges don't have types, just registers and code offsets. 424 case SymbolKind::S_DEFRANGE_REGISTER: 425 case SymbolKind::S_DEFRANGE_REGISTER_REL: 426 case SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL: 427 case SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE: 428 case SymbolKind::S_DEFRANGE_SUBFIELD_REGISTER: 429 case SymbolKind::S_DEFRANGE_SUBFIELD: 430 break; 431 432 // No type references. 433 case SymbolKind::S_LABEL32: 434 case SymbolKind::S_OBJNAME: 435 case SymbolKind::S_COMPILE: 436 case SymbolKind::S_COMPILE2: 437 case SymbolKind::S_COMPILE3: 438 case SymbolKind::S_ENVBLOCK: 439 case SymbolKind::S_BLOCK32: 440 case SymbolKind::S_FRAMEPROC: 441 case SymbolKind::S_THUNK32: 442 case SymbolKind::S_FRAMECOOKIE: 443 case SymbolKind::S_UNAMESPACE: 444 break; 445 // Scope ending symbols. 446 case SymbolKind::S_END: 447 case SymbolKind::S_INLINESITE_END: 448 case SymbolKind::S_PROC_ID_END: 449 break; 450 default: 451 return false; // Unknown symbol. 452 } 453 return true; 454 } 455 456 void llvm::codeview::discoverTypeIndices(const CVType &Type, 457 SmallVectorImpl<TiReference> &Refs) { 458 ::discoverTypeIndices(Type.content(), Type.kind(), Refs); 459 } 460 461 static void resolveTypeIndexReferences(ArrayRef<uint8_t> RecordData, 462 ArrayRef<TiReference> Refs, 463 SmallVectorImpl<TypeIndex> &Indices) { 464 Indices.clear(); 465 466 if (Refs.empty()) 467 return; 468 469 RecordData = RecordData.drop_front(sizeof(RecordPrefix)); 470 471 BinaryStreamReader Reader(RecordData, support::little); 472 for (const auto &Ref : Refs) { 473 Reader.setOffset(Ref.Offset); 474 FixedStreamArray<TypeIndex> Run; 475 cantFail(Reader.readArray(Run, Ref.Count)); 476 Indices.append(Run.begin(), Run.end()); 477 } 478 } 479 480 void llvm::codeview::discoverTypeIndices(const CVType &Type, 481 SmallVectorImpl<TypeIndex> &Indices) { 482 return discoverTypeIndices(Type.RecordData, Indices); 483 } 484 485 void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData, 486 SmallVectorImpl<TypeIndex> &Indices) { 487 SmallVector<TiReference, 4> Refs; 488 discoverTypeIndices(RecordData, Refs); 489 resolveTypeIndexReferences(RecordData, Refs, Indices); 490 } 491 492 void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData, 493 SmallVectorImpl<TiReference> &Refs) { 494 const RecordPrefix *P = 495 reinterpret_cast<const RecordPrefix *>(RecordData.data()); 496 TypeLeafKind K = static_cast<TypeLeafKind>(uint16_t(P->RecordKind)); 497 ::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K, Refs); 498 } 499 500 bool llvm::codeview::discoverTypeIndicesInSymbol( 501 const CVSymbol &Sym, SmallVectorImpl<TiReference> &Refs) { 502 SymbolKind K = Sym.kind(); 503 return ::discoverTypeIndices(Sym.content(), K, Refs); 504 } 505 506 bool llvm::codeview::discoverTypeIndicesInSymbol( 507 ArrayRef<uint8_t> RecordData, SmallVectorImpl<TiReference> &Refs) { 508 const RecordPrefix *P = 509 reinterpret_cast<const RecordPrefix *>(RecordData.data()); 510 SymbolKind K = static_cast<SymbolKind>(uint16_t(P->RecordKind)); 511 return ::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K, 512 Refs); 513 } 514 515 bool llvm::codeview::discoverTypeIndicesInSymbol( 516 ArrayRef<uint8_t> RecordData, SmallVectorImpl<TypeIndex> &Indices) { 517 SmallVector<TiReference, 2> Refs; 518 if (!discoverTypeIndicesInSymbol(RecordData, Refs)) 519 return false; 520 resolveTypeIndexReferences(RecordData, Refs, Indices); 521 return true; 522 } 523