1 //===- DebugTypes.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "DebugTypes.h" 10 #include "COFFLinkerContext.h" 11 #include "Chunks.h" 12 #include "Driver.h" 13 #include "InputFiles.h" 14 #include "PDB.h" 15 #include "TypeMerger.h" 16 #include "lld/Common/ErrorHandler.h" 17 #include "lld/Common/Memory.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" 20 #include "llvm/DebugInfo/CodeView/TypeRecord.h" 21 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" 22 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" 23 #include "llvm/DebugInfo/PDB/GenericError.h" 24 #include "llvm/DebugInfo/PDB/Native/InfoStream.h" 25 #include "llvm/DebugInfo/PDB/Native/NativeSession.h" 26 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 27 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h" 28 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 29 #include "llvm/Support/FormatVariadic.h" 30 #include "llvm/Support/Parallel.h" 31 #include "llvm/Support/Path.h" 32 33 using namespace llvm; 34 using namespace llvm::codeview; 35 using namespace lld; 36 using namespace lld::coff; 37 38 namespace { 39 class TypeServerIpiSource; 40 41 // The TypeServerSource class represents a PDB type server, a file referenced by 42 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ 43 // files, therefore there must be only once instance per OBJ lot. The file path 44 // is discovered from the dependent OBJ's debug type stream. The 45 // TypeServerSource object is then queued and loaded by the COFF Driver. The 46 // debug type stream for such PDB files will be merged first in the final PDB, 47 // before any dependent OBJ. 48 class TypeServerSource : public TpiSource { 49 public: 50 explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f) 51 : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) { 52 if (f->loadErrorStr) 53 return; 54 pdb::PDBFile &file = f->session->getPDBFile(); 55 auto expectedInfo = file.getPDBInfoStream(); 56 if (!expectedInfo) 57 return; 58 Guid = expectedInfo->getGuid(); 59 auto it = ctx.typeServerSourceMappings.emplace(Guid, this); 60 if (!it.second) { 61 // If we hit here we have collision on Guid's in two PDB files. 62 // This can happen if the PDB Guid is invalid or if we are really 63 // unlucky. This should fall back on stright file-system lookup. 64 it.first->second = nullptr; 65 } 66 } 67 68 Error mergeDebugT(TypeMerger *m) override; 69 70 void loadGHashes() override; 71 void remapTpiWithGHashes(GHashState *g) override; 72 73 bool isDependency() const override { return true; } 74 75 PDBInputFile *pdbInputFile = nullptr; 76 77 // TpiSource for IPI stream. 78 TypeServerIpiSource *ipiSrc = nullptr; 79 80 // The PDB signature GUID. 81 codeview::GUID Guid; 82 }; 83 84 // Companion to TypeServerSource. Stores the index map for the IPI stream in the 85 // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the 86 // invariant of one type index space per source. 87 class TypeServerIpiSource : public TpiSource { 88 public: 89 explicit TypeServerIpiSource(COFFLinkerContext &ctx) 90 : TpiSource(ctx, PDBIpi, nullptr) {} 91 92 friend class TypeServerSource; 93 94 // All of the TpiSource methods are no-ops. The parent TypeServerSource 95 // handles both TPI and IPI. 96 Error mergeDebugT(TypeMerger *m) override { return Error::success(); } 97 void loadGHashes() override {} 98 void remapTpiWithGHashes(GHashState *g) override {} 99 bool isDependency() const override { return true; } 100 }; 101 102 // This class represents the debug type stream of an OBJ file that depends on a 103 // PDB type server (see TypeServerSource). 104 class UseTypeServerSource : public TpiSource { 105 Expected<TypeServerSource *> getTypeServerSource(); 106 107 public: 108 UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts) 109 : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {} 110 111 Error mergeDebugT(TypeMerger *m) override; 112 113 // No need to load ghashes from /Zi objects. 114 void loadGHashes() override {} 115 void remapTpiWithGHashes(GHashState *g) override; 116 117 // Information about the PDB type server dependency, that needs to be loaded 118 // in before merging this OBJ. 119 TypeServer2Record typeServerDependency; 120 }; 121 122 // This class represents the debug type stream of a Microsoft precompiled 123 // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output 124 // PDB, before any other OBJs that depend on this. Note that only MSVC generate 125 // such files, clang does not. 126 class PrecompSource : public TpiSource { 127 public: 128 PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) { 129 // If the S_OBJNAME record contains the PCH signature, we'll register this 130 // source file right away. 131 registerMapping(); 132 } 133 134 Error mergeDebugT(TypeMerger *m) override; 135 136 void loadGHashes() override; 137 138 bool isDependency() const override { return true; } 139 140 private: 141 void registerMapping(); 142 143 // Whether this precomp OBJ was recorded in the precompSourceMappings map. 144 // Only happens if the file->pchSignature is valid. 145 bool registered = false; 146 }; 147 148 // This class represents the debug type stream of an OBJ file that depends on a 149 // Microsoft precompiled headers OBJ (see PrecompSource). 150 class UsePrecompSource : public TpiSource { 151 public: 152 UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp) 153 : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {} 154 155 Error mergeDebugT(TypeMerger *m) override; 156 157 void loadGHashes() override; 158 void remapTpiWithGHashes(GHashState *g) override; 159 160 private: 161 Error mergeInPrecompHeaderObj(); 162 163 PrecompSource *findObjByName(StringRef fileNameOnly); 164 PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr); 165 Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr); 166 167 public: 168 // Information about the Precomp OBJ dependency, that needs to be loaded in 169 // before merging this OBJ. 170 PrecompRecord precompDependency; 171 }; 172 } // namespace 173 174 TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f) 175 : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) { 176 ctx.addTpiSource(this); 177 } 178 179 // Vtable key method. 180 TpiSource::~TpiSource() { 181 // Silence any assertions about unchecked errors. 182 consumeError(std::move(typeMergingError)); 183 } 184 185 TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) { 186 return make<TpiSource>(ctx, TpiSource::Regular, file); 187 } 188 189 TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx, 190 PDBInputFile *pdbInputFile) { 191 // Type server sources come in pairs: the TPI stream, and the IPI stream. 192 auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile); 193 if (pdbInputFile->session->getPDBFile().hasPDBIpiStream()) 194 tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx); 195 return tpiSource; 196 } 197 198 TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx, 199 ObjFile *file, 200 TypeServer2Record ts) { 201 return make<UseTypeServerSource>(ctx, file, ts); 202 } 203 204 TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) { 205 return make<PrecompSource>(ctx, file); 206 } 207 208 TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx, 209 ObjFile *file, 210 PrecompRecord precomp) { 211 return make<UsePrecompSource>(ctx, file, precomp); 212 } 213 214 bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const { 215 if (ti.isSimple()) 216 return true; 217 218 // This can be an item index or a type index. Choose the appropriate map. 219 ArrayRef<TypeIndex> tpiOrIpiMap = 220 (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap; 221 if (ti.toArrayIndex() >= tpiOrIpiMap.size()) 222 return false; 223 ti = tpiOrIpiMap[ti.toArrayIndex()]; 224 return true; 225 } 226 227 void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec, 228 ArrayRef<TiReference> typeRefs) { 229 MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix)); 230 for (const TiReference &ref : typeRefs) { 231 unsigned byteSize = ref.Count * sizeof(TypeIndex); 232 if (contents.size() < ref.Offset + byteSize) 233 fatal("symbol record too short"); 234 235 MutableArrayRef<TypeIndex> indices( 236 reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count); 237 for (TypeIndex &ti : indices) { 238 if (!remapTypeIndex(ti, ref.Kind)) { 239 if (ctx.config.verbose) { 240 uint16_t kind = 241 reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind; 242 StringRef fname = file ? file->getName() : "<unknown PDB>"; 243 log("failed to remap type index in record of kind 0x" + 244 utohexstr(kind) + " in " + fname + " with bad " + 245 (ref.Kind == TiRefKind::IndexRef ? "item" : "type") + 246 " index 0x" + utohexstr(ti.getIndex())); 247 } 248 ti = TypeIndex(SimpleTypeKind::NotTranslated); 249 continue; 250 } 251 } 252 } 253 } 254 255 void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) { 256 // TODO: Handle errors similar to symbols. 257 SmallVector<TiReference, 32> typeRefs; 258 discoverTypeIndices(CVType(rec), typeRefs); 259 remapRecord(rec, typeRefs); 260 } 261 262 bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) { 263 // Discover type index references in the record. Skip it if we don't 264 // know where they are. 265 SmallVector<TiReference, 32> typeRefs; 266 if (!discoverTypeIndicesInSymbol(rec, typeRefs)) 267 return false; 268 remapRecord(rec, typeRefs); 269 return true; 270 } 271 272 // A COFF .debug$H section is currently a clang extension. This function checks 273 // if a .debug$H section is in a format that we expect / understand, so that we 274 // can ignore any sections which are coincidentally also named .debug$H but do 275 // not contain a format we recognize. 276 static bool canUseDebugH(ArrayRef<uint8_t> debugH) { 277 if (debugH.size() < sizeof(object::debug_h_header)) 278 return false; 279 auto *header = 280 reinterpret_cast<const object::debug_h_header *>(debugH.data()); 281 debugH = debugH.drop_front(sizeof(object::debug_h_header)); 282 return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC && 283 header->Version == 0 && 284 header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) && 285 (debugH.size() % 8 == 0); 286 } 287 288 static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) { 289 SectionChunk *sec = 290 SectionChunk::findByName(file->getDebugChunks(), ".debug$H"); 291 if (!sec) 292 return std::nullopt; 293 ArrayRef<uint8_t> contents = sec->getContents(); 294 if (!canUseDebugH(contents)) 295 return std::nullopt; 296 return contents; 297 } 298 299 static ArrayRef<GloballyHashedType> 300 getHashesFromDebugH(ArrayRef<uint8_t> debugH) { 301 assert(canUseDebugH(debugH)); 302 debugH = debugH.drop_front(sizeof(object::debug_h_header)); 303 uint32_t count = debugH.size() / sizeof(GloballyHashedType); 304 return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count}; 305 } 306 307 // Merge .debug$T for a generic object file. 308 Error TpiSource::mergeDebugT(TypeMerger *m) { 309 assert(!ctx.config.debugGHashes && 310 "use remapTpiWithGHashes when ghash is enabled"); 311 312 CVTypeArray types; 313 BinaryStreamReader reader(file->debugTypes, support::little); 314 cantFail(reader.readArray(types, reader.getLength())); 315 316 // When dealing with PCH.OBJ, some indices were already merged. 317 unsigned nbHeadIndices = indexMapStorage.size(); 318 319 std::optional<PCHMergerInfo> pchInfo; 320 if (auto err = mergeTypeAndIdRecords(m->idTable, m->typeTable, 321 indexMapStorage, types, pchInfo)) 322 fatal("codeview::mergeTypeAndIdRecords failed: " + 323 toString(std::move(err))); 324 if (pchInfo) { 325 file->pchSignature = pchInfo->PCHSignature; 326 endPrecompIdx = pchInfo->EndPrecompIndex; 327 } 328 329 // In an object, there is only one mapping for both types and items. 330 tpiMap = indexMapStorage; 331 ipiMap = indexMapStorage; 332 333 if (ctx.config.showSummary) { 334 nbTypeRecords = indexMapStorage.size() - nbHeadIndices; 335 nbTypeRecordsBytes = reader.getLength(); 336 // Count how many times we saw each type record in our input. This 337 // calculation requires a second pass over the type records to classify each 338 // record as a type or index. This is slow, but this code executes when 339 // collecting statistics. 340 m->tpiCounts.resize(m->getTypeTable().size()); 341 m->ipiCounts.resize(m->getIDTable().size()); 342 uint32_t srcIdx = nbHeadIndices; 343 for (const CVType &ty : types) { 344 TypeIndex dstIdx = tpiMap[srcIdx++]; 345 // Type merging may fail, so a complex source type may become the simple 346 // NotTranslated type, which cannot be used as an array index. 347 if (dstIdx.isSimple()) 348 continue; 349 SmallVectorImpl<uint32_t> &counts = 350 isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts; 351 ++counts[dstIdx.toArrayIndex()]; 352 } 353 } 354 355 return Error::success(); 356 } 357 358 // Merge types from a type server PDB. 359 Error TypeServerSource::mergeDebugT(TypeMerger *m) { 360 assert(!ctx.config.debugGHashes && 361 "use remapTpiWithGHashes when ghash is enabled"); 362 363 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); 364 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream(); 365 if (auto e = expectedTpi.takeError()) 366 fatal("Type server does not have TPI stream: " + toString(std::move(e))); 367 pdb::TpiStream *maybeIpi = nullptr; 368 if (pdbFile.hasPDBIpiStream()) { 369 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream(); 370 if (auto e = expectedIpi.takeError()) 371 fatal("Error getting type server IPI stream: " + toString(std::move(e))); 372 maybeIpi = &*expectedIpi; 373 } 374 375 // Merge TPI first, because the IPI stream will reference type indices. 376 if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage, 377 expectedTpi->typeArray())) 378 fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err))); 379 tpiMap = indexMapStorage; 380 381 // Merge IPI. 382 if (maybeIpi) { 383 if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage, 384 maybeIpi->typeArray())) 385 fatal("codeview::mergeIdRecords failed: " + toString(std::move(err))); 386 ipiMap = ipiSrc->indexMapStorage; 387 } 388 389 if (ctx.config.showSummary) { 390 nbTypeRecords = tpiMap.size() + ipiMap.size(); 391 nbTypeRecordsBytes = 392 expectedTpi->typeArray().getUnderlyingStream().getLength() + 393 (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength() 394 : 0); 395 396 // Count how many times we saw each type record in our input. If a 397 // destination type index is present in the source to destination type index 398 // map, that means we saw it once in the input. Add it to our histogram. 399 m->tpiCounts.resize(m->getTypeTable().size()); 400 m->ipiCounts.resize(m->getIDTable().size()); 401 for (TypeIndex ti : tpiMap) 402 if (!ti.isSimple()) 403 ++m->tpiCounts[ti.toArrayIndex()]; 404 for (TypeIndex ti : ipiMap) 405 if (!ti.isSimple()) 406 ++m->ipiCounts[ti.toArrayIndex()]; 407 } 408 409 return Error::success(); 410 } 411 412 Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() { 413 const codeview::GUID &tsId = typeServerDependency.getGuid(); 414 StringRef tsPath = typeServerDependency.getName(); 415 416 TypeServerSource *tsSrc = nullptr; 417 auto it = ctx.typeServerSourceMappings.find(tsId); 418 if (it != ctx.typeServerSourceMappings.end()) { 419 tsSrc = (TypeServerSource *)it->second; 420 } 421 if (tsSrc == nullptr) { 422 // The file failed to load, lookup by name 423 PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file); 424 if (!pdb) 425 return createFileError(tsPath, errorCodeToError(std::error_code( 426 ENOENT, std::generic_category()))); 427 // If an error occurred during loading, throw it now 428 if (pdb->loadErrorStr) 429 return createFileError( 430 tsPath, make_error<StringError>(*pdb->loadErrorStr, 431 llvm::inconvertibleErrorCode())); 432 433 tsSrc = (TypeServerSource *)pdb->debugTypesObj; 434 435 // Just because a file with a matching name was found and it was an actual 436 // PDB file doesn't mean it matches. For it to match the InfoStream's GUID 437 // must match the GUID specified in the TypeServer2 record. 438 if (tsSrc->Guid != tsId) { 439 return createFileError(tsPath, 440 make_error<pdb::PDBError>( 441 pdb::pdb_error_code::signature_out_of_date)); 442 } 443 } 444 return tsSrc; 445 } 446 447 Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { 448 Expected<TypeServerSource *> tsSrc = getTypeServerSource(); 449 if (!tsSrc) 450 return tsSrc.takeError(); 451 452 pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile(); 453 auto expectedInfo = pdbSession.getPDBInfoStream(); 454 if (!expectedInfo) 455 return expectedInfo.takeError(); 456 457 // Reuse the type index map of the type server. 458 tpiMap = (*tsSrc)->tpiMap; 459 ipiMap = (*tsSrc)->ipiMap; 460 return Error::success(); 461 } 462 463 static bool equalsPath(StringRef path1, StringRef path2) { 464 #if defined(_WIN32) 465 return path1.equals_insensitive(path2); 466 #else 467 return path1.equals(path2); 468 #endif 469 } 470 471 // Find by name an OBJ provided on the command line 472 PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) { 473 SmallString<128> currentPath; 474 for (auto kv : ctx.precompSourceMappings) { 475 StringRef currentFileName = sys::path::filename(kv.second->file->getName(), 476 sys::path::Style::windows); 477 478 // Compare based solely on the file name (link.exe behavior) 479 if (equalsPath(currentFileName, fileNameOnly)) 480 return (PrecompSource *)kv.second; 481 } 482 return nullptr; 483 } 484 485 PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file, 486 PrecompRecord &pr) { 487 // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP 488 // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly, 489 // the paths embedded in the OBJs are in the Windows format. 490 SmallString<128> prFileName = 491 sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows); 492 493 auto it = ctx.precompSourceMappings.find(pr.getSignature()); 494 if (it != ctx.precompSourceMappings.end()) { 495 return (PrecompSource *)it->second; 496 } 497 // Lookup by name 498 return findObjByName(prFileName); 499 } 500 501 Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file, 502 PrecompRecord &pr) { 503 PrecompSource *precomp = findPrecompSource(file, pr); 504 505 if (!precomp) 506 return createFileError( 507 pr.getPrecompFilePath(), 508 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch)); 509 510 // Don't rely on the PCH signature to validate the concordance between the PCH 511 // and the OBJ that uses it. However we do validate here that the 512 // LF_ENDPRECOMP record index lines up with the number of type records 513 // LF_PRECOMP is expecting. 514 if (precomp->endPrecompIdx != pr.getTypesCount()) 515 return createFileError( 516 toString(file), 517 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch)); 518 519 return precomp; 520 } 521 522 /// Merges a precompiled headers TPI map into the current TPI map. The 523 /// precompiled headers object will also be loaded and remapped in the 524 /// process. 525 Error UsePrecompSource::mergeInPrecompHeaderObj() { 526 auto e = findPrecompMap(file, precompDependency); 527 if (!e) 528 return e.takeError(); 529 530 PrecompSource *precompSrc = *e; 531 if (precompSrc->tpiMap.empty()) 532 return Error::success(); 533 534 assert(precompDependency.getStartTypeIndex() == 535 TypeIndex::FirstNonSimpleIndex); 536 assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size()); 537 // Use the previously remapped index map from the precompiled headers. 538 indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(), 539 precompSrc->tpiMap.begin() + 540 precompDependency.getTypesCount()); 541 542 return Error::success(); 543 } 544 545 Error UsePrecompSource::mergeDebugT(TypeMerger *m) { 546 // This object was compiled with /Yu, so process the corresponding 547 // precompiled headers object (/Yc) first. Some type indices in the current 548 // object are referencing data in the precompiled headers object, so we need 549 // both to be loaded. 550 if (Error e = mergeInPrecompHeaderObj()) 551 return e; 552 553 return TpiSource::mergeDebugT(m); 554 } 555 556 Error PrecompSource::mergeDebugT(TypeMerger *m) { 557 // In some cases, the S_OBJNAME record doesn't contain the PCH signature. 558 // The signature comes later with the LF_ENDPRECOMP record, so we first need 559 // to merge in all the .PCH.OBJ file type records, before registering below. 560 if (Error e = TpiSource::mergeDebugT(m)) 561 return e; 562 563 registerMapping(); 564 565 return Error::success(); 566 } 567 568 void PrecompSource::registerMapping() { 569 if (registered) 570 return; 571 if (file->pchSignature && *file->pchSignature) { 572 auto it = ctx.precompSourceMappings.emplace(*file->pchSignature, this); 573 if (!it.second) 574 fatal("a PCH object with the same signature has already been provided (" + 575 toString(it.first->second->file) + " and " + toString(file) + ")"); 576 registered = true; 577 } 578 } 579 580 //===----------------------------------------------------------------------===// 581 // Parellel GHash type merging implementation. 582 //===----------------------------------------------------------------------===// 583 584 void TpiSource::loadGHashes() { 585 if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) { 586 ghashes = getHashesFromDebugH(*debugH); 587 ownedGHashes = false; 588 } else { 589 CVTypeArray types; 590 BinaryStreamReader reader(file->debugTypes, support::little); 591 cantFail(reader.readArray(types, reader.getLength())); 592 assignGHashesFromVector(GloballyHashedType::hashTypes(types)); 593 } 594 595 fillIsItemIndexFromDebugT(); 596 } 597 598 // Copies ghashes from a vector into an array. These are long lived, so it's 599 // worth the time to copy these into an appropriately sized vector to reduce 600 // memory usage. 601 void TpiSource::assignGHashesFromVector( 602 std::vector<GloballyHashedType> &&hashVec) { 603 if (hashVec.empty()) 604 return; 605 GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()]; 606 memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType)); 607 ghashes = ArrayRef(hashes, hashVec.size()); 608 ownedGHashes = true; 609 } 610 611 // Faster way to iterate type records. forEachTypeChecked is faster than 612 // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops. 613 static void forEachTypeChecked(ArrayRef<uint8_t> types, 614 function_ref<void(const CVType &)> fn) { 615 checkError( 616 forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error { 617 fn(ty); 618 return Error::success(); 619 })); 620 } 621 622 // Walk over file->debugTypes and fill in the isItemIndex bit vector. 623 // TODO: Store this information in .debug$H so that we don't have to recompute 624 // it. This is the main bottleneck slowing down parallel ghashing with one 625 // thread over single-threaded ghashing. 626 void TpiSource::fillIsItemIndexFromDebugT() { 627 uint32_t index = 0; 628 isItemIndex.resize(ghashes.size()); 629 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { 630 if (isIdRecord(ty.kind())) 631 isItemIndex.set(index); 632 ++index; 633 }); 634 } 635 636 void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) { 637 // Decide if the merged type goes into TPI or IPI. 638 bool isItem = isIdRecord(ty.kind()); 639 MergedInfo &merged = isItem ? mergedIpi : mergedTpi; 640 641 // Copy the type into our mutable buffer. 642 assert(ty.length() <= codeview::MaxRecordLength); 643 size_t offset = merged.recs.size(); 644 size_t newSize = alignTo(ty.length(), 4); 645 merged.recs.resize(offset + newSize); 646 auto newRec = MutableArrayRef(&merged.recs[offset], newSize); 647 memcpy(newRec.data(), ty.data().data(), newSize); 648 649 // Fix up the record prefix and padding bytes if it required resizing. 650 if (newSize != ty.length()) { 651 reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2; 652 for (size_t i = ty.length(); i < newSize; ++i) 653 newRec[i] = LF_PAD0 + (newSize - i); 654 } 655 656 // Remap the type indices in the new record. 657 remapTypesInTypeRecord(newRec); 658 uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec))); 659 merged.recSizes.push_back(static_cast<uint16_t>(newSize)); 660 merged.recHashes.push_back(pdbHash); 661 662 // Retain a mapping from PDB function id to PDB function type. This mapping is 663 // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32 664 // symbols. 665 if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) { 666 bool success = ty.length() >= 12; 667 TypeIndex funcId = curIndex; 668 if (success) 669 success &= remapTypeIndex(funcId, TiRefKind::IndexRef); 670 TypeIndex funcType = 671 *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]); 672 if (success) { 673 funcIdToType.push_back({funcId, funcType}); 674 } else { 675 StringRef fname = file ? file->getName() : "<unknown PDB>"; 676 warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) + 677 " in " + fname); 678 } 679 } 680 } 681 682 void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords, 683 TypeIndex beginIndex) { 684 // Re-sort the list of unique types by index. 685 if (kind == PDB) 686 assert(llvm::is_sorted(uniqueTypes)); 687 else 688 llvm::sort(uniqueTypes); 689 690 // Accumulate all the unique types into one buffer in mergedTypes. 691 uint32_t ghashIndex = 0; 692 auto nextUniqueIndex = uniqueTypes.begin(); 693 assert(mergedTpi.recs.empty()); 694 assert(mergedIpi.recs.empty()); 695 696 // Pre-compute the number of elements in advance to avoid std::vector resizes. 697 unsigned nbTpiRecs = 0; 698 unsigned nbIpiRecs = 0; 699 forEachTypeChecked(typeRecords, [&](const CVType &ty) { 700 if (nextUniqueIndex != uniqueTypes.end() && 701 *nextUniqueIndex == ghashIndex) { 702 assert(ty.length() <= codeview::MaxRecordLength); 703 size_t newSize = alignTo(ty.length(), 4); 704 (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize; 705 ++nextUniqueIndex; 706 } 707 ++ghashIndex; 708 }); 709 mergedTpi.recs.reserve(nbTpiRecs); 710 mergedIpi.recs.reserve(nbIpiRecs); 711 712 // Do the actual type merge. 713 ghashIndex = 0; 714 nextUniqueIndex = uniqueTypes.begin(); 715 forEachTypeChecked(typeRecords, [&](const CVType &ty) { 716 if (nextUniqueIndex != uniqueTypes.end() && 717 *nextUniqueIndex == ghashIndex) { 718 mergeTypeRecord(beginIndex + ghashIndex, ty); 719 ++nextUniqueIndex; 720 } 721 ++ghashIndex; 722 }); 723 assert(nextUniqueIndex == uniqueTypes.end() && 724 "failed to merge all desired records"); 725 assert(uniqueTypes.size() == 726 mergedTpi.recSizes.size() + mergedIpi.recSizes.size() && 727 "missing desired record"); 728 } 729 730 void TpiSource::remapTpiWithGHashes(GHashState *g) { 731 assert(ctx.config.debugGHashes && "ghashes must be enabled"); 732 fillMapFromGHashes(g); 733 tpiMap = indexMapStorage; 734 ipiMap = indexMapStorage; 735 mergeUniqueTypeRecords(file->debugTypes); 736 // TODO: Free all unneeded ghash resources now that we have a full index map. 737 738 if (ctx.config.showSummary) { 739 nbTypeRecords = ghashes.size(); 740 nbTypeRecordsBytes = file->debugTypes.size(); 741 } 742 } 743 744 // PDBs do not actually store global hashes, so when merging a type server 745 // PDB we have to synthesize global hashes. To do this, we first synthesize 746 // global hashes for the TPI stream, since it is independent, then we 747 // synthesize hashes for the IPI stream, using the hashes for the TPI stream 748 // as inputs. 749 void TypeServerSource::loadGHashes() { 750 // Don't hash twice. 751 if (!ghashes.empty()) 752 return; 753 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); 754 755 // Hash TPI stream. 756 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream(); 757 if (auto e = expectedTpi.takeError()) 758 fatal("Type server does not have TPI stream: " + toString(std::move(e))); 759 assignGHashesFromVector( 760 GloballyHashedType::hashTypes(expectedTpi->typeArray())); 761 isItemIndex.resize(ghashes.size()); 762 763 // Hash IPI stream, which depends on TPI ghashes. 764 if (!pdbFile.hasPDBIpiStream()) 765 return; 766 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream(); 767 if (auto e = expectedIpi.takeError()) 768 fatal("error retrieving IPI stream: " + toString(std::move(e))); 769 ipiSrc->assignGHashesFromVector( 770 GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes)); 771 772 // The IPI stream isItemIndex bitvector should be all ones. 773 ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size()); 774 ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size()); 775 } 776 777 // Flatten discontiguous PDB type arrays to bytes so that we can use 778 // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from 779 // type servers is faster than iterating all object files compiled with /Z7 with 780 // CVTypeArray, which has high overheads due to the virtual interface of 781 // BinaryStream::readBytes. 782 static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) { 783 BinaryStreamRef stream = types.getUnderlyingStream(); 784 ArrayRef<uint8_t> debugTypes; 785 checkError(stream.readBytes(0, stream.getLength(), debugTypes)); 786 return debugTypes; 787 } 788 789 // Merge types from a type server PDB. 790 void TypeServerSource::remapTpiWithGHashes(GHashState *g) { 791 assert(ctx.config.debugGHashes && "ghashes must be enabled"); 792 793 // IPI merging depends on TPI, so do TPI first, then do IPI. No need to 794 // propagate errors, those should've been handled during ghash loading. 795 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); 796 pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream()); 797 fillMapFromGHashes(g); 798 tpiMap = indexMapStorage; 799 mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray())); 800 if (pdbFile.hasPDBIpiStream()) { 801 pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream()); 802 ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size()); 803 ipiSrc->fillMapFromGHashes(g); 804 ipiMap = ipiSrc->indexMapStorage; 805 ipiSrc->tpiMap = tpiMap; 806 ipiSrc->ipiMap = ipiMap; 807 ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray())); 808 809 if (ctx.config.showSummary) { 810 nbTypeRecords = ipiSrc->ghashes.size(); 811 nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength(); 812 } 813 } 814 815 if (ctx.config.showSummary) { 816 nbTypeRecords += ghashes.size(); 817 nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength(); 818 } 819 } 820 821 void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) { 822 // No remapping to do with /Zi objects. Simply use the index map from the type 823 // server. Errors should have been reported earlier. Symbols from this object 824 // will be ignored. 825 Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource(); 826 if (!maybeTsSrc) { 827 typeMergingError = 828 joinErrors(std::move(typeMergingError), maybeTsSrc.takeError()); 829 return; 830 } 831 TypeServerSource *tsSrc = *maybeTsSrc; 832 tpiMap = tsSrc->tpiMap; 833 ipiMap = tsSrc->ipiMap; 834 } 835 836 void PrecompSource::loadGHashes() { 837 if (getDebugH(file)) { 838 warn("ignoring .debug$H section; pch with ghash is not implemented"); 839 } 840 841 uint32_t ghashIdx = 0; 842 std::vector<GloballyHashedType> hashVec; 843 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { 844 // Remember the index of the LF_ENDPRECOMP record so it can be excluded from 845 // the PDB. There must be an entry in the list of ghashes so that the type 846 // indexes of the following records in the /Yc PCH object line up. 847 if (ty.kind() == LF_ENDPRECOMP) { 848 EndPrecompRecord endPrecomp; 849 cantFail(TypeDeserializer::deserializeAs<EndPrecompRecord>( 850 const_cast<CVType &>(ty), endPrecomp)); 851 file->pchSignature = endPrecomp.getSignature(); 852 registerMapping(); 853 endPrecompIdx = ghashIdx; 854 } 855 856 hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec)); 857 isItemIndex.push_back(isIdRecord(ty.kind())); 858 ++ghashIdx; 859 }); 860 assignGHashesFromVector(std::move(hashVec)); 861 } 862 863 void UsePrecompSource::loadGHashes() { 864 auto e = findPrecompMap(file, precompDependency); 865 if (!e) { 866 warn(toString(e.takeError())); 867 return; 868 } 869 870 PrecompSource *pchSrc = *e; 871 872 // To compute ghashes of a /Yu object file, we need to build on the ghashes of 873 // the /Yc PCH object. After we are done hashing, discard the ghashes from the 874 // PCH source so we don't unnecessarily try to deduplicate them. 875 std::vector<GloballyHashedType> hashVec = 876 pchSrc->ghashes.take_front(precompDependency.getTypesCount()); 877 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { 878 hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec)); 879 isItemIndex.push_back(isIdRecord(ty.kind())); 880 }); 881 hashVec.erase(hashVec.begin(), 882 hashVec.begin() + precompDependency.getTypesCount()); 883 assignGHashesFromVector(std::move(hashVec)); 884 } 885 886 void UsePrecompSource::remapTpiWithGHashes(GHashState *g) { 887 fillMapFromGHashes(g); 888 // This object was compiled with /Yu, so process the corresponding 889 // precompiled headers object (/Yc) first. Some type indices in the current 890 // object are referencing data in the precompiled headers object, so we need 891 // both to be loaded. 892 if (Error e = mergeInPrecompHeaderObj()) { 893 typeMergingError = joinErrors(std::move(typeMergingError), std::move(e)); 894 return; 895 } 896 897 tpiMap = indexMapStorage; 898 ipiMap = indexMapStorage; 899 mergeUniqueTypeRecords(file->debugTypes, 900 TypeIndex(precompDependency.getStartTypeIndex() + 901 precompDependency.getTypesCount())); 902 if (ctx.config.showSummary) { 903 nbTypeRecords = ghashes.size(); 904 nbTypeRecordsBytes = file->debugTypes.size(); 905 } 906 } 907 908 namespace { 909 /// A concurrent hash table for global type hashing. It is based on this paper: 910 /// Concurrent Hash Tables: Fast and General(?)! 911 /// https://dl.acm.org/doi/10.1145/3309206 912 /// 913 /// This hash table is meant to be used in two phases: 914 /// 1. concurrent insertions 915 /// 2. concurrent reads 916 /// It does not support lookup, deletion, or rehashing. It uses linear probing. 917 /// 918 /// The paper describes storing a key-value pair in two machine words. 919 /// Generally, the values stored in this map are type indices, and we can use 920 /// those values to recover the ghash key from a side table. This allows us to 921 /// shrink the table entries further at the cost of some loads, and sidesteps 922 /// the need for a 128 bit atomic compare-and-swap operation. 923 /// 924 /// During insertion, a priority function is used to decide which insertion 925 /// should be preferred. This ensures that the output is deterministic. For 926 /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred. 927 /// 928 class GHashCell; 929 struct GHashTable { 930 GHashCell *table = nullptr; 931 uint32_t tableSize = 0; 932 933 GHashTable() = default; 934 ~GHashTable(); 935 936 /// Initialize the table with the given size. Because the table cannot be 937 /// resized, the initial size of the table must be large enough to contain all 938 /// inputs, or insertion may not be able to find an empty cell. 939 void init(uint32_t newTableSize); 940 941 /// Insert the cell with the given ghash into the table. Return the insertion 942 /// position in the table. It is safe for the caller to store the insertion 943 /// position because the table cannot be resized. 944 uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash, 945 GHashCell newCell); 946 }; 947 948 /// A ghash table cell for deduplicating types from TpiSources. 949 class GHashCell { 950 // Force "data" to be 64-bit aligned; otherwise, some versions of clang 951 // will generate calls to libatomic when using some versions of libstdc++ 952 // on 32-bit targets. (Also, in theory, there could be a target where 953 // new[] doesn't always return an 8-byte-aligned allocation.) 954 alignas(sizeof(uint64_t)) uint64_t data = 0; 955 956 public: 957 GHashCell() = default; 958 959 // Construct data most to least significant so that sorting works well: 960 // - isItem 961 // - tpiSrcIdx 962 // - ghashIdx 963 // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a 964 // non-zero representation. 965 GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx) 966 : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) | 967 ghashIdx) { 968 assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure"); 969 assert(ghashIdx == getGHashIdx() && "round trip failure"); 970 } 971 972 explicit GHashCell(uint64_t data) : data(data) {} 973 974 // The empty cell is all zeros. 975 bool isEmpty() const { return data == 0ULL; } 976 977 /// Extract the tpiSrcIdx. 978 uint32_t getTpiSrcIdx() const { 979 return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1; 980 } 981 982 /// Extract the index into the ghash array of the TpiSource. 983 uint32_t getGHashIdx() const { return (uint32_t)data; } 984 985 bool isItem() const { return data & (1ULL << 63U); } 986 987 /// Get the ghash key for this cell. 988 GloballyHashedType getGHash(const COFFLinkerContext &ctx) const { 989 return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()]; 990 } 991 992 /// The priority function for the cell. The data is stored such that lower 993 /// tpiSrcIdx and ghashIdx values are preferred, which means that type record 994 /// from earlier sources are more likely to prevail. 995 friend inline bool operator<(const GHashCell &l, const GHashCell &r) { 996 return l.data < r.data; 997 } 998 }; 999 } // namespace 1000 1001 namespace lld::coff { 1002 /// This type is just a wrapper around GHashTable with external linkage so it 1003 /// can be used from a header. 1004 struct GHashState { 1005 GHashTable table; 1006 }; 1007 } // namespace lld::coff 1008 1009 GHashTable::~GHashTable() { delete[] table; } 1010 1011 void GHashTable::init(uint32_t newTableSize) { 1012 table = new GHashCell[newTableSize]; 1013 memset(table, 0, newTableSize * sizeof(GHashCell)); 1014 tableSize = newTableSize; 1015 } 1016 1017 uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash, 1018 GHashCell newCell) { 1019 assert(!newCell.isEmpty() && "cannot insert empty cell value"); 1020 1021 // FIXME: The low bytes of SHA1 have low entropy for short records, which 1022 // type records are. Swap the byte order for better entropy. A better ghash 1023 // won't need this. 1024 uint32_t startIdx = 1025 llvm::byteswap<uint64_t>(*reinterpret_cast<uint64_t *>(&ghash)) % 1026 tableSize; 1027 1028 // Do a linear probe starting at startIdx. 1029 uint32_t idx = startIdx; 1030 while (true) { 1031 // Run a compare and swap loop. There are four cases: 1032 // - cell is empty: CAS into place and return 1033 // - cell has matching key, earlier priority: do nothing, return 1034 // - cell has matching key, later priority: CAS into place and return 1035 // - cell has non-matching key: hash collision, probe next cell 1036 auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]); 1037 GHashCell oldCell(cellPtr->load()); 1038 while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) { 1039 // Check if there is an existing ghash entry with a higher priority 1040 // (earlier ordering). If so, this is a duplicate, we are done. 1041 if (!oldCell.isEmpty() && oldCell < newCell) 1042 return idx; 1043 // Either the cell is empty, or our value is higher priority. Try to 1044 // compare and swap. If it succeeds, we are done. 1045 if (cellPtr->compare_exchange_weak(oldCell, newCell)) 1046 return idx; 1047 // If the CAS failed, check this cell again. 1048 } 1049 1050 // Advance the probe. Wrap around to the beginning if we run off the end. 1051 ++idx; 1052 idx = idx == tableSize ? 0 : idx; 1053 if (idx == startIdx) { 1054 // If this becomes an issue, we could mark failure and rehash from the 1055 // beginning with a bigger table. There is no difference between rehashing 1056 // internally and starting over. 1057 report_fatal_error("ghash table is full"); 1058 } 1059 } 1060 llvm_unreachable("left infloop"); 1061 } 1062 1063 TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc) 1064 : typeTable(alloc), idTable(alloc), ctx(c) {} 1065 1066 TypeMerger::~TypeMerger() = default; 1067 1068 void TypeMerger::mergeTypesWithGHash() { 1069 // Load ghashes. Do type servers and PCH objects first. 1070 { 1071 ScopedTimer t1(ctx.loadGHashTimer); 1072 parallelForEach(dependencySources, 1073 [&](TpiSource *source) { source->loadGHashes(); }); 1074 parallelForEach(objectSources, 1075 [&](TpiSource *source) { source->loadGHashes(); }); 1076 } 1077 1078 ScopedTimer t2(ctx.mergeGHashTimer); 1079 GHashState ghashState; 1080 1081 // Estimate the size of hash table needed to deduplicate ghashes. This *must* 1082 // be larger than the number of unique types, or hash table insertion may not 1083 // be able to find a vacant slot. Summing the input types guarantees this, but 1084 // it is a gross overestimate. The table size could be reduced to save memory, 1085 // but it would require implementing rehashing, and this table is generally 1086 // small compared to total memory usage, at eight bytes per input type record, 1087 // and most input type records are larger than eight bytes. 1088 size_t tableSize = 0; 1089 for (TpiSource *source : ctx.tpiSourceList) 1090 tableSize += source->ghashes.size(); 1091 1092 // Cap the table size so that we can use 32-bit cell indices. Type indices are 1093 // also 32-bit, so this is an inherent PDB file format limit anyway. 1094 tableSize = 1095 std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize); 1096 ghashState.table.init(static_cast<uint32_t>(tableSize)); 1097 1098 // Insert ghashes in parallel. During concurrent insertion, we cannot observe 1099 // the contents of the hash table cell, but we can remember the insertion 1100 // position. Because the table does not rehash, the position will not change 1101 // under insertion. After insertion is done, the value of the cell can be read 1102 // to retrieve the final PDB type index. 1103 parallelFor(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) { 1104 TpiSource *source = ctx.tpiSourceList[tpiSrcIdx]; 1105 source->indexMapStorage.resize(source->ghashes.size()); 1106 for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) { 1107 if (source->shouldOmitFromPdb(i)) { 1108 source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated); 1109 continue; 1110 } 1111 GloballyHashedType ghash = source->ghashes[i]; 1112 bool isItem = source->isItemIndex.test(i); 1113 uint32_t cellIdx = 1114 ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i)); 1115 1116 // Store the ghash cell index as a type index in indexMapStorage. Later 1117 // we will replace it with the PDB type index. 1118 source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx); 1119 } 1120 }); 1121 1122 // Collect all non-empty cells and sort them. This will implicitly assign 1123 // destination type indices, and partition the entries into type records and 1124 // item records. It arranges types in this order: 1125 // - type records 1126 // - source 0, type 0... 1127 // - source 1, type 1... 1128 // - item records 1129 // - source 0, type 1... 1130 // - source 1, type 0... 1131 std::vector<GHashCell> entries; 1132 for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) { 1133 if (!cell.isEmpty()) 1134 entries.push_back(cell); 1135 } 1136 parallelSort(entries, std::less<GHashCell>()); 1137 log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n", 1138 tableSize ? double(entries.size()) / tableSize : 0, 1139 entries.size(), tableSize)); 1140 1141 // Find out how many type and item indices there are. 1142 auto mid = llvm::lower_bound(entries, GHashCell(true, 0, 0)); 1143 assert((mid == entries.end() || mid->isItem()) && 1144 (mid == entries.begin() || !std::prev(mid)->isItem()) && 1145 "midpoint is not midpoint"); 1146 uint32_t numTypes = std::distance(entries.begin(), mid); 1147 uint32_t numItems = std::distance(mid, entries.end()); 1148 log("Tpi record count: " + Twine(numTypes)); 1149 log("Ipi record count: " + Twine(numItems)); 1150 1151 // Make a list of the "unique" type records to merge for each tpi source. Type 1152 // merging will skip indices not on this list. Store the destination PDB type 1153 // index for these unique types in the tpiMap for each source. The entries for 1154 // non-unique types will be filled in prior to type merging. 1155 for (uint32_t i = 0, e = entries.size(); i < e; ++i) { 1156 auto &cell = entries[i]; 1157 uint32_t tpiSrcIdx = cell.getTpiSrcIdx(); 1158 TpiSource *source = ctx.tpiSourceList[tpiSrcIdx]; 1159 source->uniqueTypes.push_back(cell.getGHashIdx()); 1160 1161 // Update the ghash table to store the destination PDB type index in the 1162 // table. 1163 uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes; 1164 uint32_t ghashCellIndex = 1165 source->indexMapStorage[cell.getGHashIdx()].toArrayIndex(); 1166 ghashState.table.table[ghashCellIndex] = 1167 GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex); 1168 } 1169 1170 // In parallel, remap all types. 1171 for (TpiSource *source : dependencySources) 1172 source->remapTpiWithGHashes(&ghashState); 1173 parallelForEach(objectSources, [&](TpiSource *source) { 1174 source->remapTpiWithGHashes(&ghashState); 1175 }); 1176 1177 // Build a global map of from function ID to function type. 1178 for (TpiSource *source : ctx.tpiSourceList) { 1179 for (auto idToType : source->funcIdToType) 1180 funcIdToType.insert(idToType); 1181 source->funcIdToType.clear(); 1182 } 1183 1184 clearGHashes(); 1185 } 1186 1187 void TypeMerger::sortDependencies() { 1188 // Order dependencies first, but preserve the existing order. 1189 std::vector<TpiSource *> deps; 1190 std::vector<TpiSource *> objs; 1191 for (TpiSource *s : ctx.tpiSourceList) 1192 (s->isDependency() ? deps : objs).push_back(s); 1193 uint32_t numDeps = deps.size(); 1194 uint32_t numObjs = objs.size(); 1195 ctx.tpiSourceList = std::move(deps); 1196 ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end()); 1197 for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i) 1198 ctx.tpiSourceList[i]->tpiSrcIdx = i; 1199 dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps); 1200 objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs); 1201 } 1202 1203 /// Given the index into the ghash table for a particular type, return the type 1204 /// index for that type in the output PDB. 1205 static TypeIndex loadPdbTypeIndexFromCell(GHashState *g, 1206 uint32_t ghashCellIdx) { 1207 GHashCell cell = g->table.table[ghashCellIdx]; 1208 return TypeIndex::fromArrayIndex(cell.getGHashIdx()); 1209 } 1210 1211 /// Free heap allocated ghashes. 1212 void TypeMerger::clearGHashes() { 1213 for (TpiSource *src : ctx.tpiSourceList) { 1214 if (src->ownedGHashes) 1215 delete[] src->ghashes.data(); 1216 src->ghashes = {}; 1217 src->isItemIndex.clear(); 1218 src->uniqueTypes.clear(); 1219 } 1220 } 1221 1222 // Fill in a TPI or IPI index map using ghashes. For each source type, use its 1223 // ghash to lookup its final type index in the PDB, and store that in the map. 1224 void TpiSource::fillMapFromGHashes(GHashState *g) { 1225 for (size_t i = 0, e = ghashes.size(); i < e; ++i) { 1226 TypeIndex fakeCellIndex = indexMapStorage[i]; 1227 if (fakeCellIndex.isSimple()) 1228 indexMapStorage[i] = fakeCellIndex; 1229 else 1230 indexMapStorage[i] = 1231 loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex()); 1232 } 1233 } 1234