1 //===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" 10 #include "llvm/ADT/SmallString.h" 11 #include "llvm/ADT/StringExtras.h" 12 #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" 13 #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" 14 #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" 15 #include "llvm/DebugInfo/CodeView/TypeIndex.h" 16 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" 17 #include "llvm/DebugInfo/CodeView/TypeRecord.h" 18 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" 19 #include "llvm/Support/Error.h" 20 21 using namespace llvm; 22 using namespace llvm::codeview; 23 24 static inline size_t slotForIndex(TypeIndex Idx) { 25 assert(!Idx.isSimple() && "simple type indices have no slots"); 26 return Idx.getIndex() - TypeIndex::FirstNonSimpleIndex; 27 } 28 29 namespace { 30 31 /// Implementation of CodeView type stream merging. 32 /// 33 /// A CodeView type stream is a series of records that reference each other 34 /// through type indices. A type index is either "simple", meaning it is less 35 /// than 0x1000 and refers to a builtin type, or it is complex, meaning it 36 /// refers to a prior type record in the current stream. The type index of a 37 /// record is equal to the number of records before it in the stream plus 38 /// 0x1000. 39 /// 40 /// Type records are only allowed to use type indices smaller than their own, so 41 /// a type stream is effectively a topologically sorted DAG. Cycles occuring in 42 /// the type graph of the source program are resolved with forward declarations 43 /// of composite types. This class implements the following type stream merging 44 /// algorithm, which relies on this DAG structure: 45 /// 46 /// - Begin with a new empty stream, and a new empty hash table that maps from 47 /// type record contents to new type index. 48 /// - For each new type stream, maintain a map from source type index to 49 /// destination type index. 50 /// - For each record, copy it and rewrite its type indices to be valid in the 51 /// destination type stream. 52 /// - If the new type record is not already present in the destination stream 53 /// hash table, append it to the destination type stream, assign it the next 54 /// type index, and update the two hash tables. 55 /// - If the type record already exists in the destination stream, discard it 56 /// and update the type index map to forward the source type index to the 57 /// existing destination type index. 58 /// 59 /// As an additional complication, type stream merging actually produces two 60 /// streams: an item (or IPI) stream and a type stream, as this is what is 61 /// actually stored in the final PDB. We choose which records go where by 62 /// looking at the record kind. 63 class TypeStreamMerger { 64 public: 65 explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest) 66 : IndexMap(SourceToDest) { 67 // When dealing with precompiled headers objects, all data in SourceToDest 68 // belongs to the precompiled headers object, and is assumed to be already 69 // remapped to the target PDB. Any forthcoming type that will be merged in 70 // might potentially back-reference this data. We also don't want to resolve 71 // twice the types in the precompiled object. 72 CurIndex += SourceToDest.size(); 73 } 74 75 static const TypeIndex Untranslated; 76 77 // Local hashing entry points 78 Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds, 79 MergingTypeTableBuilder &DestTypes, 80 const CVTypeArray &IdsAndTypes, Optional<uint32_t> &S); 81 Error mergeIdRecords(MergingTypeTableBuilder &Dest, 82 ArrayRef<TypeIndex> TypeSourceToDest, 83 const CVTypeArray &Ids); 84 Error mergeTypeRecords(MergingTypeTableBuilder &Dest, 85 const CVTypeArray &Types); 86 87 // Global hashing entry points 88 Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds, 89 GlobalTypeTableBuilder &DestTypes, 90 const CVTypeArray &IdsAndTypes, 91 ArrayRef<GloballyHashedType> Hashes, 92 Optional<uint32_t> &S); 93 Error mergeIdRecords(GlobalTypeTableBuilder &Dest, 94 ArrayRef<TypeIndex> TypeSourceToDest, 95 const CVTypeArray &Ids, 96 ArrayRef<GloballyHashedType> Hashes); 97 Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, const CVTypeArray &Types, 98 ArrayRef<GloballyHashedType> Hashes, 99 Optional<uint32_t> &S); 100 101 private: 102 Error doit(const CVTypeArray &Types); 103 104 Error remapAllTypes(const CVTypeArray &Types); 105 106 Error remapType(const CVType &Type); 107 108 void addMapping(TypeIndex Idx); 109 110 inline bool remapTypeIndex(TypeIndex &Idx) { 111 // If we're mapping a pure index stream, then IndexMap only contains 112 // mappings from OldIdStream -> NewIdStream, in which case we will need to 113 // use the special mapping from OldTypeStream -> NewTypeStream which was 114 // computed externally. Regardless, we use this special map if and only if 115 // we are doing an id-only mapping. 116 if (!hasTypeStream()) 117 return remapIndex(Idx, TypeLookup); 118 119 assert(TypeLookup.empty()); 120 return remapIndex(Idx, IndexMap); 121 } 122 inline bool remapItemIndex(TypeIndex &Idx) { 123 assert(hasIdStream()); 124 return remapIndex(Idx, IndexMap); 125 } 126 127 bool hasTypeStream() const { 128 return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream); 129 } 130 131 bool hasIdStream() const { 132 return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream); 133 } 134 135 ArrayRef<uint8_t> remapIndices(const CVType &OriginalType, 136 MutableArrayRef<uint8_t> Storage); 137 138 inline bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map) { 139 if (LLVM_LIKELY(remapIndexSimple(Idx, Map))) 140 return true; 141 142 return remapIndexFallback(Idx, Map); 143 } 144 145 inline bool remapIndexSimple(TypeIndex &Idx, ArrayRef<TypeIndex> Map) const { 146 // Simple types are unchanged. 147 if (Idx.isSimple()) 148 return true; 149 150 // Check if this type index refers to a record we've already translated 151 // successfully. If it refers to a type later in the stream or a record we 152 // had to defer, defer it until later pass. 153 unsigned MapPos = slotForIndex(Idx); 154 if (LLVM_UNLIKELY(MapPos >= Map.size() || Map[MapPos] == Untranslated)) 155 return false; 156 157 Idx = Map[MapPos]; 158 return true; 159 } 160 161 bool remapIndexFallback(TypeIndex &Idx, ArrayRef<TypeIndex> Map); 162 163 Error errorCorruptRecord() const { 164 return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record); 165 } 166 167 Expected<bool> shouldRemapType(const CVType &Type); 168 169 Optional<Error> LastError; 170 171 bool UseGlobalHashes = false; 172 173 bool IsSecondPass = false; 174 175 unsigned NumBadIndices = 0; 176 177 TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex}; 178 179 MergingTypeTableBuilder *DestIdStream = nullptr; 180 MergingTypeTableBuilder *DestTypeStream = nullptr; 181 182 GlobalTypeTableBuilder *DestGlobalIdStream = nullptr; 183 GlobalTypeTableBuilder *DestGlobalTypeStream = nullptr; 184 185 ArrayRef<GloballyHashedType> GlobalHashes; 186 187 // If we're only mapping id records, this array contains the mapping for 188 // type records. 189 ArrayRef<TypeIndex> TypeLookup; 190 191 /// Map from source type index to destination type index. Indexed by source 192 /// type index minus 0x1000. 193 SmallVectorImpl<TypeIndex> &IndexMap; 194 195 /// Temporary storage that we use to copy a record's data while re-writing 196 /// its type indices. 197 SmallVector<uint8_t, 256> RemapStorage; 198 199 Optional<uint32_t> PCHSignature; 200 }; 201 202 } // end anonymous namespace 203 204 const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated); 205 206 void TypeStreamMerger::addMapping(TypeIndex Idx) { 207 if (!IsSecondPass) { 208 assert(IndexMap.size() == slotForIndex(CurIndex) && 209 "visitKnownRecord should add one index map entry"); 210 IndexMap.push_back(Idx); 211 } else { 212 assert(slotForIndex(CurIndex) < IndexMap.size()); 213 IndexMap[slotForIndex(CurIndex)] = Idx; 214 } 215 } 216 217 bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx, 218 ArrayRef<TypeIndex> Map) { 219 size_t MapPos = slotForIndex(Idx); 220 221 // If this is the second pass and this index isn't in the map, then it points 222 // outside the current type stream, and this is a corrupt record. 223 if (IsSecondPass && MapPos >= Map.size()) { 224 // FIXME: Print a more useful error. We can give the current record and the 225 // index that we think its pointing to. 226 if (LastError) 227 LastError = joinErrors(std::move(*LastError), errorCorruptRecord()); 228 else 229 LastError = errorCorruptRecord(); 230 } 231 232 ++NumBadIndices; 233 234 // This type index is invalid. Remap this to "not translated by cvpack", 235 // and return failure. 236 Idx = Untranslated; 237 return false; 238 } 239 240 // Local hashing entry points 241 Error TypeStreamMerger::mergeTypeRecords(MergingTypeTableBuilder &Dest, 242 const CVTypeArray &Types) { 243 DestTypeStream = &Dest; 244 UseGlobalHashes = false; 245 246 return doit(Types); 247 } 248 249 Error TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder &Dest, 250 ArrayRef<TypeIndex> TypeSourceToDest, 251 const CVTypeArray &Ids) { 252 DestIdStream = &Dest; 253 TypeLookup = TypeSourceToDest; 254 UseGlobalHashes = false; 255 256 return doit(Ids); 257 } 258 259 Error TypeStreamMerger::mergeTypesAndIds(MergingTypeTableBuilder &DestIds, 260 MergingTypeTableBuilder &DestTypes, 261 const CVTypeArray &IdsAndTypes, 262 Optional<uint32_t> &S) { 263 DestIdStream = &DestIds; 264 DestTypeStream = &DestTypes; 265 UseGlobalHashes = false; 266 auto Err = doit(IdsAndTypes); 267 S = PCHSignature; 268 return Err; 269 } 270 271 // Global hashing entry points 272 Error TypeStreamMerger::mergeTypeRecords(GlobalTypeTableBuilder &Dest, 273 const CVTypeArray &Types, 274 ArrayRef<GloballyHashedType> Hashes, 275 Optional<uint32_t> &S) { 276 DestGlobalTypeStream = &Dest; 277 UseGlobalHashes = true; 278 GlobalHashes = Hashes; 279 auto Err = doit(Types); 280 S = PCHSignature; 281 return Err; 282 } 283 284 Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest, 285 ArrayRef<TypeIndex> TypeSourceToDest, 286 const CVTypeArray &Ids, 287 ArrayRef<GloballyHashedType> Hashes) { 288 DestGlobalIdStream = &Dest; 289 TypeLookup = TypeSourceToDest; 290 UseGlobalHashes = true; 291 GlobalHashes = Hashes; 292 293 return doit(Ids); 294 } 295 296 Error TypeStreamMerger::mergeTypesAndIds(GlobalTypeTableBuilder &DestIds, 297 GlobalTypeTableBuilder &DestTypes, 298 const CVTypeArray &IdsAndTypes, 299 ArrayRef<GloballyHashedType> Hashes, 300 Optional<uint32_t> &S) { 301 DestGlobalIdStream = &DestIds; 302 DestGlobalTypeStream = &DestTypes; 303 UseGlobalHashes = true; 304 GlobalHashes = Hashes; 305 auto Err = doit(IdsAndTypes); 306 S = PCHSignature; 307 return Err; 308 } 309 310 Error TypeStreamMerger::doit(const CVTypeArray &Types) { 311 if (auto EC = remapAllTypes(Types)) 312 return EC; 313 314 // If we found bad indices but no other errors, try doing another pass and see 315 // if we can resolve the indices that weren't in the map on the first pass. 316 // This may require multiple passes, but we should always make progress. MASM 317 // is the only known CodeView producer that makes type streams that aren't 318 // topologically sorted. The standard library contains MASM-produced objects, 319 // so this is important to handle correctly, but we don't have to be too 320 // efficient. MASM type streams are usually very small. 321 while (!LastError && NumBadIndices > 0) { 322 unsigned BadIndicesRemaining = NumBadIndices; 323 IsSecondPass = true; 324 NumBadIndices = 0; 325 CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex); 326 327 if (auto EC = remapAllTypes(Types)) 328 return EC; 329 330 assert(NumBadIndices <= BadIndicesRemaining && 331 "second pass found more bad indices"); 332 if (!LastError && NumBadIndices == BadIndicesRemaining) { 333 return llvm::make_error<CodeViewError>( 334 cv_error_code::corrupt_record, "Input type graph contains cycles"); 335 } 336 } 337 338 if (LastError) 339 return std::move(*LastError); 340 return Error::success(); 341 } 342 343 Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) { 344 BinaryStreamRef Stream = Types.getUnderlyingStream(); 345 ArrayRef<uint8_t> Buffer; 346 cantFail(Stream.readBytes(0, Stream.getLength(), Buffer)); 347 348 return forEachCodeViewRecord<CVType>( 349 Buffer, [this](const CVType &T) { return remapType(T); }); 350 } 351 352 Error TypeStreamMerger::remapType(const CVType &Type) { 353 auto R = shouldRemapType(Type); 354 if (!R) 355 return R.takeError(); 356 357 TypeIndex DestIdx = Untranslated; 358 if (*R) { 359 auto DoSerialize = 360 [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> { 361 return remapIndices(Type, Storage); 362 }; 363 unsigned AlignedSize = alignTo(Type.RecordData.size(), 4); 364 365 if (LLVM_LIKELY(UseGlobalHashes)) { 366 GlobalTypeTableBuilder &Dest = 367 isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream; 368 GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()]; 369 DestIdx = Dest.insertRecordAs(H, AlignedSize, DoSerialize); 370 } else { 371 MergingTypeTableBuilder &Dest = 372 isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream; 373 374 RemapStorage.resize(AlignedSize); 375 ArrayRef<uint8_t> Result = DoSerialize(RemapStorage); 376 if (!Result.empty()) 377 DestIdx = Dest.insertRecordBytes(Result); 378 } 379 } 380 addMapping(DestIdx); 381 382 ++CurIndex; 383 assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) && 384 "visitKnownRecord should add one index map entry"); 385 return Error::success(); 386 } 387 388 ArrayRef<uint8_t> 389 TypeStreamMerger::remapIndices(const CVType &OriginalType, 390 MutableArrayRef<uint8_t> Storage) { 391 unsigned Align = OriginalType.RecordData.size() & 3; 392 assert(Storage.size() == alignTo(OriginalType.RecordData.size(), 4) && 393 "The storage buffer size is not a multiple of 4 bytes which will " 394 "cause misalignment in the output TPI stream!"); 395 396 SmallVector<TiReference, 4> Refs; 397 discoverTypeIndices(OriginalType.RecordData, Refs); 398 if (Refs.empty() && Align == 0) 399 return OriginalType.RecordData; 400 401 ::memcpy(Storage.data(), OriginalType.RecordData.data(), 402 OriginalType.RecordData.size()); 403 404 uint8_t *DestContent = Storage.data() + sizeof(RecordPrefix); 405 406 for (auto &Ref : Refs) { 407 TypeIndex *DestTIs = 408 reinterpret_cast<TypeIndex *>(DestContent + Ref.Offset); 409 410 for (size_t I = 0; I < Ref.Count; ++I) { 411 TypeIndex &TI = DestTIs[I]; 412 bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(TI) 413 : remapTypeIndex(TI); 414 if (LLVM_UNLIKELY(!Success)) 415 return {}; 416 } 417 } 418 419 if (Align > 0) { 420 RecordPrefix *StorageHeader = 421 reinterpret_cast<RecordPrefix *>(Storage.data()); 422 StorageHeader->RecordLen += 4 - Align; 423 424 DestContent = Storage.data() + OriginalType.RecordData.size(); 425 for (; Align < 4; ++Align) 426 *DestContent++ = LF_PAD4 - Align; 427 } 428 return Storage; 429 } 430 431 Error llvm::codeview::mergeTypeRecords(MergingTypeTableBuilder &Dest, 432 SmallVectorImpl<TypeIndex> &SourceToDest, 433 const CVTypeArray &Types) { 434 TypeStreamMerger M(SourceToDest); 435 return M.mergeTypeRecords(Dest, Types); 436 } 437 438 Error llvm::codeview::mergeIdRecords(MergingTypeTableBuilder &Dest, 439 ArrayRef<TypeIndex> TypeSourceToDest, 440 SmallVectorImpl<TypeIndex> &SourceToDest, 441 const CVTypeArray &Ids) { 442 TypeStreamMerger M(SourceToDest); 443 return M.mergeIdRecords(Dest, TypeSourceToDest, Ids); 444 } 445 446 Error llvm::codeview::mergeTypeAndIdRecords( 447 MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, 448 SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes, 449 Optional<uint32_t> &PCHSignature) { 450 TypeStreamMerger M(SourceToDest); 451 return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, PCHSignature); 452 } 453 454 Error llvm::codeview::mergeTypeAndIdRecords( 455 GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes, 456 SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes, 457 ArrayRef<GloballyHashedType> Hashes, Optional<uint32_t> &PCHSignature) { 458 TypeStreamMerger M(SourceToDest); 459 return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes, 460 PCHSignature); 461 } 462 463 Error llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder &Dest, 464 SmallVectorImpl<TypeIndex> &SourceToDest, 465 const CVTypeArray &Types, 466 ArrayRef<GloballyHashedType> Hashes, 467 Optional<uint32_t> &PCHSignature) { 468 TypeStreamMerger M(SourceToDest); 469 return M.mergeTypeRecords(Dest, Types, Hashes, PCHSignature); 470 } 471 472 Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest, 473 ArrayRef<TypeIndex> Types, 474 SmallVectorImpl<TypeIndex> &SourceToDest, 475 const CVTypeArray &Ids, 476 ArrayRef<GloballyHashedType> Hashes) { 477 TypeStreamMerger M(SourceToDest); 478 return M.mergeIdRecords(Dest, Types, Ids, Hashes); 479 } 480 481 Expected<bool> TypeStreamMerger::shouldRemapType(const CVType &Type) { 482 // For object files containing precompiled types, we need to extract the 483 // signature, through EndPrecompRecord. This is done here for performance 484 // reasons, to avoid re-parsing the Types stream. 485 if (Type.kind() == LF_ENDPRECOMP) { 486 EndPrecompRecord EP; 487 if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(Type), 488 EP)) 489 return joinErrors(std::move(EC), errorCorruptRecord()); 490 if (PCHSignature.hasValue()) 491 return errorCorruptRecord(); 492 PCHSignature.emplace(EP.getSignature()); 493 return false; 494 } 495 return true; 496 } 497