1 //===- DebugTypes.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "DebugTypes.h"
10 #include "Chunks.h"
11 #include "Driver.h"
12 #include "InputFiles.h"
13 #include "PDB.h"
14 #include "TypeMerger.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "lld/Common/Timer.h"
18 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
19 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
20 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
21 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
22 #include "llvm/DebugInfo/PDB/GenericError.h"
23 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
24 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
25 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
26 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
27 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
28 #include "llvm/Support/FormatVariadic.h"
29 #include "llvm/Support/Parallel.h"
30 #include "llvm/Support/Path.h"
31
32 using namespace llvm;
33 using namespace llvm::codeview;
34 using namespace lld;
35 using namespace lld::coff;
36
37 namespace {
38 class TypeServerIpiSource;
39
40 // The TypeServerSource class represents a PDB type server, a file referenced by
41 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
42 // files, therefore there must be only once instance per OBJ lot. The file path
43 // is discovered from the dependent OBJ's debug type stream. The
44 // TypeServerSource object is then queued and loaded by the COFF Driver. The
45 // debug type stream for such PDB files will be merged first in the final PDB,
46 // before any dependent OBJ.
47 class TypeServerSource : public TpiSource {
48 public:
TypeServerSource(PDBInputFile * f)49 explicit TypeServerSource(PDBInputFile *f)
50 : TpiSource(PDB, nullptr), pdbInputFile(f) {
51 if (f->loadErr && *f->loadErr)
52 return;
53 pdb::PDBFile &file = f->session->getPDBFile();
54 auto expectedInfo = file.getPDBInfoStream();
55 if (!expectedInfo)
56 return;
57 Guid = expectedInfo->getGuid();
58 auto it = mappings.emplace(Guid, this);
59 assert(it.second);
60 (void)it;
61 }
62
63 Error mergeDebugT(TypeMerger *m) override;
64
65 void loadGHashes() override;
66 void remapTpiWithGHashes(GHashState *g) override;
67
isDependency() const68 bool isDependency() const override { return true; }
69
70 PDBInputFile *pdbInputFile = nullptr;
71
72 // TpiSource for IPI stream.
73 TypeServerIpiSource *ipiSrc = nullptr;
74
75 // The PDB signature GUID.
76 codeview::GUID Guid;
77
78 static std::map<codeview::GUID, TypeServerSource *> mappings;
79 };
80
81 // Companion to TypeServerSource. Stores the index map for the IPI stream in the
82 // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
83 // invariant of one type index space per source.
84 class TypeServerIpiSource : public TpiSource {
85 public:
TypeServerIpiSource()86 explicit TypeServerIpiSource() : TpiSource(PDBIpi, nullptr) {}
87
88 friend class TypeServerSource;
89
90 // All of the TpiSource methods are no-ops. The parent TypeServerSource
91 // handles both TPI and IPI.
mergeDebugT(TypeMerger * m)92 Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
loadGHashes()93 void loadGHashes() override {}
remapTpiWithGHashes(GHashState * g)94 void remapTpiWithGHashes(GHashState *g) override {}
isDependency() const95 bool isDependency() const override { return true; }
96 };
97
98 // This class represents the debug type stream of an OBJ file that depends on a
99 // PDB type server (see TypeServerSource).
100 class UseTypeServerSource : public TpiSource {
101 Expected<TypeServerSource *> getTypeServerSource();
102
103 public:
UseTypeServerSource(ObjFile * f,TypeServer2Record ts)104 UseTypeServerSource(ObjFile *f, TypeServer2Record ts)
105 : TpiSource(UsingPDB, f), typeServerDependency(ts) {}
106
107 Error mergeDebugT(TypeMerger *m) override;
108
109 // No need to load ghashes from /Zi objects.
loadGHashes()110 void loadGHashes() override {}
111 void remapTpiWithGHashes(GHashState *g) override;
112
113 // Information about the PDB type server dependency, that needs to be loaded
114 // in before merging this OBJ.
115 TypeServer2Record typeServerDependency;
116 };
117
118 // This class represents the debug type stream of a Microsoft precompiled
119 // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
120 // PDB, before any other OBJs that depend on this. Note that only MSVC generate
121 // such files, clang does not.
122 class PrecompSource : public TpiSource {
123 public:
PrecompSource(ObjFile * f)124 PrecompSource(ObjFile *f) : TpiSource(PCH, f) {
125 if (!f->pchSignature || !*f->pchSignature)
126 fatal(toString(f) +
127 " claims to be a PCH object, but does not have a valid signature");
128 auto it = mappings.emplace(*f->pchSignature, this);
129 if (!it.second)
130 fatal("a PCH object with the same signature has already been provided (" +
131 toString(it.first->second->file) + " and " + toString(file) + ")");
132 }
133
134 void loadGHashes() override;
135
isDependency() const136 bool isDependency() const override { return true; }
137
138 static std::map<uint32_t, PrecompSource *> mappings;
139 };
140
141 // This class represents the debug type stream of an OBJ file that depends on a
142 // Microsoft precompiled headers OBJ (see PrecompSource).
143 class UsePrecompSource : public TpiSource {
144 public:
UsePrecompSource(ObjFile * f,PrecompRecord precomp)145 UsePrecompSource(ObjFile *f, PrecompRecord precomp)
146 : TpiSource(UsingPCH, f), precompDependency(precomp) {}
147
148 Error mergeDebugT(TypeMerger *m) override;
149
150 void loadGHashes() override;
151 void remapTpiWithGHashes(GHashState *g) override;
152
153 private:
154 Error mergeInPrecompHeaderObj();
155
156 public:
157 // Information about the Precomp OBJ dependency, that needs to be loaded in
158 // before merging this OBJ.
159 PrecompRecord precompDependency;
160 };
161 } // namespace
162
163 std::vector<TpiSource *> TpiSource::instances;
164 ArrayRef<TpiSource *> TpiSource::dependencySources;
165 ArrayRef<TpiSource *> TpiSource::objectSources;
166
TpiSource(TpiKind k,ObjFile * f)167 TpiSource::TpiSource(TpiKind k, ObjFile *f)
168 : kind(k), tpiSrcIdx(instances.size()), file(f) {
169 instances.push_back(this);
170 }
171
172 // Vtable key method.
~TpiSource()173 TpiSource::~TpiSource() {
174 // Silence any assertions about unchecked errors.
175 consumeError(std::move(typeMergingError));
176 }
177
sortDependencies()178 void TpiSource::sortDependencies() {
179 // Order dependencies first, but preserve the existing order.
180 std::vector<TpiSource *> deps;
181 std::vector<TpiSource *> objs;
182 for (TpiSource *s : instances)
183 (s->isDependency() ? deps : objs).push_back(s);
184 uint32_t numDeps = deps.size();
185 uint32_t numObjs = objs.size();
186 instances = std::move(deps);
187 instances.insert(instances.end(), objs.begin(), objs.end());
188 for (uint32_t i = 0, e = instances.size(); i < e; ++i)
189 instances[i]->tpiSrcIdx = i;
190 dependencySources = makeArrayRef(instances.data(), numDeps);
191 objectSources = makeArrayRef(instances.data() + numDeps, numObjs);
192 }
193
makeTpiSource(ObjFile * file)194 TpiSource *lld::coff::makeTpiSource(ObjFile *file) {
195 return make<TpiSource>(TpiSource::Regular, file);
196 }
197
makeTypeServerSource(PDBInputFile * pdbInputFile)198 TpiSource *lld::coff::makeTypeServerSource(PDBInputFile *pdbInputFile) {
199 // Type server sources come in pairs: the TPI stream, and the IPI stream.
200 auto *tpiSource = make<TypeServerSource>(pdbInputFile);
201 if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
202 tpiSource->ipiSrc = make<TypeServerIpiSource>();
203 return tpiSource;
204 }
205
makeUseTypeServerSource(ObjFile * file,TypeServer2Record ts)206 TpiSource *lld::coff::makeUseTypeServerSource(ObjFile *file,
207 TypeServer2Record ts) {
208 return make<UseTypeServerSource>(file, ts);
209 }
210
makePrecompSource(ObjFile * file)211 TpiSource *lld::coff::makePrecompSource(ObjFile *file) {
212 return make<PrecompSource>(file);
213 }
214
makeUsePrecompSource(ObjFile * file,PrecompRecord precomp)215 TpiSource *lld::coff::makeUsePrecompSource(ObjFile *file,
216 PrecompRecord precomp) {
217 return make<UsePrecompSource>(file, precomp);
218 }
219
220 std::map<codeview::GUID, TypeServerSource *> TypeServerSource::mappings;
221
222 std::map<uint32_t, PrecompSource *> PrecompSource::mappings;
223
remapTypeIndex(TypeIndex & ti,TiRefKind refKind) const224 bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
225 if (ti.isSimple())
226 return true;
227
228 // This can be an item index or a type index. Choose the appropriate map.
229 ArrayRef<TypeIndex> tpiOrIpiMap =
230 (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
231 if (ti.toArrayIndex() >= tpiOrIpiMap.size())
232 return false;
233 ti = tpiOrIpiMap[ti.toArrayIndex()];
234 return true;
235 }
236
remapRecord(MutableArrayRef<uint8_t> rec,ArrayRef<TiReference> typeRefs)237 void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
238 ArrayRef<TiReference> typeRefs) {
239 MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
240 for (const TiReference &ref : typeRefs) {
241 unsigned byteSize = ref.Count * sizeof(TypeIndex);
242 if (contents.size() < ref.Offset + byteSize)
243 fatal("symbol record too short");
244
245 MutableArrayRef<TypeIndex> indices(
246 reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
247 for (TypeIndex &ti : indices) {
248 if (!remapTypeIndex(ti, ref.Kind)) {
249 if (config->verbose) {
250 uint16_t kind =
251 reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
252 StringRef fname = file ? file->getName() : "<unknown PDB>";
253 log("failed to remap type index in record of kind 0x" +
254 utohexstr(kind) + " in " + fname + " with bad " +
255 (ref.Kind == TiRefKind::IndexRef ? "item" : "type") +
256 " index 0x" + utohexstr(ti.getIndex()));
257 }
258 ti = TypeIndex(SimpleTypeKind::NotTranslated);
259 continue;
260 }
261 }
262 }
263 }
264
remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec)265 void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
266 // TODO: Handle errors similar to symbols.
267 SmallVector<TiReference, 32> typeRefs;
268 discoverTypeIndices(CVType(rec), typeRefs);
269 remapRecord(rec, typeRefs);
270 }
271
remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec)272 bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
273 // Discover type index references in the record. Skip it if we don't
274 // know where they are.
275 SmallVector<TiReference, 32> typeRefs;
276 if (!discoverTypeIndicesInSymbol(rec, typeRefs))
277 return false;
278 remapRecord(rec, typeRefs);
279 return true;
280 }
281
282 // A COFF .debug$H section is currently a clang extension. This function checks
283 // if a .debug$H section is in a format that we expect / understand, so that we
284 // can ignore any sections which are coincidentally also named .debug$H but do
285 // not contain a format we recognize.
canUseDebugH(ArrayRef<uint8_t> debugH)286 static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
287 if (debugH.size() < sizeof(object::debug_h_header))
288 return false;
289 auto *header =
290 reinterpret_cast<const object::debug_h_header *>(debugH.data());
291 debugH = debugH.drop_front(sizeof(object::debug_h_header));
292 return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
293 header->Version == 0 &&
294 header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::SHA1_8) &&
295 (debugH.size() % 8 == 0);
296 }
297
getDebugH(ObjFile * file)298 static Optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
299 SectionChunk *sec =
300 SectionChunk::findByName(file->getDebugChunks(), ".debug$H");
301 if (!sec)
302 return llvm::None;
303 ArrayRef<uint8_t> contents = sec->getContents();
304 if (!canUseDebugH(contents))
305 return None;
306 return contents;
307 }
308
309 static ArrayRef<GloballyHashedType>
getHashesFromDebugH(ArrayRef<uint8_t> debugH)310 getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
311 assert(canUseDebugH(debugH));
312 debugH = debugH.drop_front(sizeof(object::debug_h_header));
313 uint32_t count = debugH.size() / sizeof(GloballyHashedType);
314 return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
315 }
316
317 // Merge .debug$T for a generic object file.
mergeDebugT(TypeMerger * m)318 Error TpiSource::mergeDebugT(TypeMerger *m) {
319 assert(!config->debugGHashes &&
320 "use remapTpiWithGHashes when ghash is enabled");
321
322 CVTypeArray types;
323 BinaryStreamReader reader(file->debugTypes, support::little);
324 cantFail(reader.readArray(types, reader.getLength()));
325
326 // When dealing with PCH.OBJ, some indices were already merged.
327 unsigned nbHeadIndices = indexMapStorage.size();
328
329 if (auto err = mergeTypeAndIdRecords(
330 m->idTable, m->typeTable, indexMapStorage, types, file->pchSignature))
331 fatal("codeview::mergeTypeAndIdRecords failed: " +
332 toString(std::move(err)));
333
334 // In an object, there is only one mapping for both types and items.
335 tpiMap = indexMapStorage;
336 ipiMap = indexMapStorage;
337
338 if (config->showSummary) {
339 nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
340 nbTypeRecordsBytes = reader.getLength();
341 // Count how many times we saw each type record in our input. This
342 // calculation requires a second pass over the type records to classify each
343 // record as a type or index. This is slow, but this code executes when
344 // collecting statistics.
345 m->tpiCounts.resize(m->getTypeTable().size());
346 m->ipiCounts.resize(m->getIDTable().size());
347 uint32_t srcIdx = nbHeadIndices;
348 for (CVType &ty : types) {
349 TypeIndex dstIdx = tpiMap[srcIdx++];
350 // Type merging may fail, so a complex source type may become the simple
351 // NotTranslated type, which cannot be used as an array index.
352 if (dstIdx.isSimple())
353 continue;
354 SmallVectorImpl<uint32_t> &counts =
355 isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts;
356 ++counts[dstIdx.toArrayIndex()];
357 }
358 }
359
360 return Error::success();
361 }
362
363 // Merge types from a type server PDB.
mergeDebugT(TypeMerger * m)364 Error TypeServerSource::mergeDebugT(TypeMerger *m) {
365 assert(!config->debugGHashes &&
366 "use remapTpiWithGHashes when ghash is enabled");
367
368 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
369 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
370 if (auto e = expectedTpi.takeError())
371 fatal("Type server does not have TPI stream: " + toString(std::move(e)));
372 pdb::TpiStream *maybeIpi = nullptr;
373 if (pdbFile.hasPDBIpiStream()) {
374 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
375 if (auto e = expectedIpi.takeError())
376 fatal("Error getting type server IPI stream: " + toString(std::move(e)));
377 maybeIpi = &*expectedIpi;
378 }
379
380 // Merge TPI first, because the IPI stream will reference type indices.
381 if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
382 expectedTpi->typeArray()))
383 fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
384 tpiMap = indexMapStorage;
385
386 // Merge IPI.
387 if (maybeIpi) {
388 if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
389 maybeIpi->typeArray()))
390 fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
391 ipiMap = ipiSrc->indexMapStorage;
392 }
393
394 if (config->showSummary) {
395 nbTypeRecords = tpiMap.size() + ipiMap.size();
396 nbTypeRecordsBytes =
397 expectedTpi->typeArray().getUnderlyingStream().getLength() +
398 (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
399 : 0);
400
401 // Count how many times we saw each type record in our input. If a
402 // destination type index is present in the source to destination type index
403 // map, that means we saw it once in the input. Add it to our histogram.
404 m->tpiCounts.resize(m->getTypeTable().size());
405 m->ipiCounts.resize(m->getIDTable().size());
406 for (TypeIndex ti : tpiMap)
407 if (!ti.isSimple())
408 ++m->tpiCounts[ti.toArrayIndex()];
409 for (TypeIndex ti : ipiMap)
410 if (!ti.isSimple())
411 ++m->ipiCounts[ti.toArrayIndex()];
412 }
413
414 return Error::success();
415 }
416
getTypeServerSource()417 Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
418 const codeview::GUID &tsId = typeServerDependency.getGuid();
419 StringRef tsPath = typeServerDependency.getName();
420
421 TypeServerSource *tsSrc;
422 auto it = TypeServerSource::mappings.find(tsId);
423 if (it != TypeServerSource::mappings.end()) {
424 tsSrc = it->second;
425 } else {
426 // The file failed to load, lookup by name
427 PDBInputFile *pdb = PDBInputFile::findFromRecordPath(tsPath, file);
428 if (!pdb)
429 return createFileError(tsPath, errorCodeToError(std::error_code(
430 ENOENT, std::generic_category())));
431 // If an error occurred during loading, throw it now
432 if (pdb->loadErr && *pdb->loadErr)
433 return createFileError(tsPath, std::move(*pdb->loadErr));
434
435 tsSrc = (TypeServerSource *)pdb->debugTypesObj;
436
437 // Just because a file with a matching name was found and it was an actual
438 // PDB file doesn't mean it matches. For it to match the InfoStream's GUID
439 // must match the GUID specified in the TypeServer2 record.
440 if (tsSrc->Guid != tsId) {
441 return createFileError(tsPath,
442 make_error<pdb::PDBError>(
443 pdb::pdb_error_code::signature_out_of_date));
444 }
445 }
446 return tsSrc;
447 }
448
mergeDebugT(TypeMerger * m)449 Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
450 Expected<TypeServerSource *> tsSrc = getTypeServerSource();
451 if (!tsSrc)
452 return tsSrc.takeError();
453
454 pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
455 auto expectedInfo = pdbSession.getPDBInfoStream();
456 if (!expectedInfo)
457 return expectedInfo.takeError();
458
459 // Reuse the type index map of the type server.
460 tpiMap = (*tsSrc)->tpiMap;
461 ipiMap = (*tsSrc)->ipiMap;
462 return Error::success();
463 }
464
equalsPath(StringRef path1,StringRef path2)465 static bool equalsPath(StringRef path1, StringRef path2) {
466 #if defined(_WIN32)
467 return path1.equals_insensitive(path2);
468 #else
469 return path1.equals(path2);
470 #endif
471 }
472
473 // Find by name an OBJ provided on the command line
findObjByName(StringRef fileNameOnly)474 static PrecompSource *findObjByName(StringRef fileNameOnly) {
475 SmallString<128> currentPath;
476 for (auto kv : PrecompSource::mappings) {
477 StringRef currentFileName = sys::path::filename(kv.second->file->getName(),
478 sys::path::Style::windows);
479
480 // Compare based solely on the file name (link.exe behavior)
481 if (equalsPath(currentFileName, fileNameOnly))
482 return kv.second;
483 }
484 return nullptr;
485 }
486
findPrecompSource(ObjFile * file,PrecompRecord & pr)487 static PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr) {
488 // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
489 // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
490 // the paths embedded in the OBJs are in the Windows format.
491 SmallString<128> prFileName =
492 sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
493
494 auto it = PrecompSource::mappings.find(pr.getSignature());
495 if (it != PrecompSource::mappings.end()) {
496 return it->second;
497 }
498 // Lookup by name
499 return findObjByName(prFileName);
500 }
501
findPrecompMap(ObjFile * file,PrecompRecord & pr)502 static Expected<PrecompSource *> findPrecompMap(ObjFile *file,
503 PrecompRecord &pr) {
504 PrecompSource *precomp = findPrecompSource(file, pr);
505
506 if (!precomp)
507 return createFileError(
508 pr.getPrecompFilePath(),
509 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
510
511 if (pr.getSignature() != file->pchSignature)
512 return createFileError(
513 toString(file),
514 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
515
516 if (pr.getSignature() != *precomp->file->pchSignature)
517 return createFileError(
518 toString(precomp->file),
519 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
520
521 return precomp;
522 }
523
524 /// Merges a precompiled headers TPI map into the current TPI map. The
525 /// precompiled headers object will also be loaded and remapped in the
526 /// process.
mergeInPrecompHeaderObj()527 Error UsePrecompSource::mergeInPrecompHeaderObj() {
528 auto e = findPrecompMap(file, precompDependency);
529 if (!e)
530 return e.takeError();
531
532 PrecompSource *precompSrc = *e;
533 if (precompSrc->tpiMap.empty())
534 return Error::success();
535
536 assert(precompDependency.getStartTypeIndex() ==
537 TypeIndex::FirstNonSimpleIndex);
538 assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
539 // Use the previously remapped index map from the precompiled headers.
540 indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(),
541 precompSrc->tpiMap.begin() +
542 precompDependency.getTypesCount());
543
544 return Error::success();
545 }
546
mergeDebugT(TypeMerger * m)547 Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
548 // This object was compiled with /Yu, so process the corresponding
549 // precompiled headers object (/Yc) first. Some type indices in the current
550 // object are referencing data in the precompiled headers object, so we need
551 // both to be loaded.
552 if (Error e = mergeInPrecompHeaderObj())
553 return e;
554
555 return TpiSource::mergeDebugT(m);
556 }
557
countTypeServerPDBs()558 uint32_t TpiSource::countTypeServerPDBs() {
559 return TypeServerSource::mappings.size();
560 }
561
countPrecompObjs()562 uint32_t TpiSource::countPrecompObjs() {
563 return PrecompSource::mappings.size();
564 }
565
clear()566 void TpiSource::clear() {
567 // Clean up any owned ghash allocations.
568 clearGHashes();
569 TpiSource::instances.clear();
570 TypeServerSource::mappings.clear();
571 PrecompSource::mappings.clear();
572 }
573
574 //===----------------------------------------------------------------------===//
575 // Parellel GHash type merging implementation.
576 //===----------------------------------------------------------------------===//
577
loadGHashes()578 void TpiSource::loadGHashes() {
579 if (Optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
580 ghashes = getHashesFromDebugH(*debugH);
581 ownedGHashes = false;
582 } else {
583 CVTypeArray types;
584 BinaryStreamReader reader(file->debugTypes, support::little);
585 cantFail(reader.readArray(types, reader.getLength()));
586 assignGHashesFromVector(GloballyHashedType::hashTypes(types));
587 }
588
589 fillIsItemIndexFromDebugT();
590 }
591
592 // Copies ghashes from a vector into an array. These are long lived, so it's
593 // worth the time to copy these into an appropriately sized vector to reduce
594 // memory usage.
assignGHashesFromVector(std::vector<GloballyHashedType> && hashVec)595 void TpiSource::assignGHashesFromVector(
596 std::vector<GloballyHashedType> &&hashVec) {
597 if (hashVec.empty())
598 return;
599 GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
600 memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType));
601 ghashes = makeArrayRef(hashes, hashVec.size());
602 ownedGHashes = true;
603 }
604
605 // Faster way to iterate type records. forEachTypeChecked is faster than
606 // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
forEachTypeChecked(ArrayRef<uint8_t> types,function_ref<void (const CVType &)> fn)607 static void forEachTypeChecked(ArrayRef<uint8_t> types,
608 function_ref<void(const CVType &)> fn) {
609 checkError(
610 forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
611 fn(ty);
612 return Error::success();
613 }));
614 }
615
616 // Walk over file->debugTypes and fill in the isItemIndex bit vector.
617 // TODO: Store this information in .debug$H so that we don't have to recompute
618 // it. This is the main bottleneck slowing down parallel ghashing with one
619 // thread over single-threaded ghashing.
fillIsItemIndexFromDebugT()620 void TpiSource::fillIsItemIndexFromDebugT() {
621 uint32_t index = 0;
622 isItemIndex.resize(ghashes.size());
623 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
624 if (isIdRecord(ty.kind()))
625 isItemIndex.set(index);
626 ++index;
627 });
628 }
629
mergeTypeRecord(TypeIndex curIndex,CVType ty)630 void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
631 // Decide if the merged type goes into TPI or IPI.
632 bool isItem = isIdRecord(ty.kind());
633 MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
634
635 // Copy the type into our mutable buffer.
636 assert(ty.length() <= codeview::MaxRecordLength);
637 size_t offset = merged.recs.size();
638 size_t newSize = alignTo(ty.length(), 4);
639 merged.recs.resize(offset + newSize);
640 auto newRec = makeMutableArrayRef(&merged.recs[offset], newSize);
641 memcpy(newRec.data(), ty.data().data(), newSize);
642
643 // Fix up the record prefix and padding bytes if it required resizing.
644 if (newSize != ty.length()) {
645 reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
646 for (size_t i = ty.length(); i < newSize; ++i)
647 newRec[i] = LF_PAD0 + (newSize - i);
648 }
649
650 // Remap the type indices in the new record.
651 remapTypesInTypeRecord(newRec);
652 uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
653 merged.recSizes.push_back(static_cast<uint16_t>(newSize));
654 merged.recHashes.push_back(pdbHash);
655
656 // Retain a mapping from PDB function id to PDB function type. This mapping is
657 // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
658 // symbols.
659 if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
660 bool success = ty.length() >= 12;
661 TypeIndex funcId = curIndex;
662 if (success)
663 success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
664 TypeIndex funcType =
665 *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]);
666 if (success) {
667 funcIdToType.push_back({funcId, funcType});
668 } else {
669 StringRef fname = file ? file->getName() : "<unknown PDB>";
670 warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) +
671 " in " + fname);
672 }
673 }
674 }
675
mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,TypeIndex beginIndex)676 void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
677 TypeIndex beginIndex) {
678 // Re-sort the list of unique types by index.
679 if (kind == PDB)
680 assert(std::is_sorted(uniqueTypes.begin(), uniqueTypes.end()));
681 else
682 llvm::sort(uniqueTypes);
683
684 // Accumulate all the unique types into one buffer in mergedTypes.
685 uint32_t ghashIndex = 0;
686 auto nextUniqueIndex = uniqueTypes.begin();
687 assert(mergedTpi.recs.empty());
688 assert(mergedIpi.recs.empty());
689
690 // Pre-compute the number of elements in advance to avoid std::vector resizes.
691 unsigned nbTpiRecs = 0;
692 unsigned nbIpiRecs = 0;
693 forEachTypeChecked(typeRecords, [&](const CVType &ty) {
694 if (nextUniqueIndex != uniqueTypes.end() &&
695 *nextUniqueIndex == ghashIndex) {
696 assert(ty.length() <= codeview::MaxRecordLength);
697 size_t newSize = alignTo(ty.length(), 4);
698 (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
699 ++nextUniqueIndex;
700 }
701 ++ghashIndex;
702 });
703 mergedTpi.recs.reserve(nbTpiRecs);
704 mergedIpi.recs.reserve(nbIpiRecs);
705
706 // Do the actual type merge.
707 ghashIndex = 0;
708 nextUniqueIndex = uniqueTypes.begin();
709 forEachTypeChecked(typeRecords, [&](const CVType &ty) {
710 if (nextUniqueIndex != uniqueTypes.end() &&
711 *nextUniqueIndex == ghashIndex) {
712 mergeTypeRecord(beginIndex + ghashIndex, ty);
713 ++nextUniqueIndex;
714 }
715 ++ghashIndex;
716 });
717 assert(nextUniqueIndex == uniqueTypes.end() &&
718 "failed to merge all desired records");
719 assert(uniqueTypes.size() ==
720 mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
721 "missing desired record");
722 }
723
remapTpiWithGHashes(GHashState * g)724 void TpiSource::remapTpiWithGHashes(GHashState *g) {
725 assert(config->debugGHashes && "ghashes must be enabled");
726 fillMapFromGHashes(g);
727 tpiMap = indexMapStorage;
728 ipiMap = indexMapStorage;
729 mergeUniqueTypeRecords(file->debugTypes);
730 // TODO: Free all unneeded ghash resources now that we have a full index map.
731
732 if (config->showSummary) {
733 nbTypeRecords = ghashes.size();
734 nbTypeRecordsBytes = file->debugTypes.size();
735 }
736 }
737
738 // PDBs do not actually store global hashes, so when merging a type server
739 // PDB we have to synthesize global hashes. To do this, we first synthesize
740 // global hashes for the TPI stream, since it is independent, then we
741 // synthesize hashes for the IPI stream, using the hashes for the TPI stream
742 // as inputs.
loadGHashes()743 void TypeServerSource::loadGHashes() {
744 // Don't hash twice.
745 if (!ghashes.empty())
746 return;
747 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
748
749 // Hash TPI stream.
750 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
751 if (auto e = expectedTpi.takeError())
752 fatal("Type server does not have TPI stream: " + toString(std::move(e)));
753 assignGHashesFromVector(
754 GloballyHashedType::hashTypes(expectedTpi->typeArray()));
755 isItemIndex.resize(ghashes.size());
756
757 // Hash IPI stream, which depends on TPI ghashes.
758 if (!pdbFile.hasPDBIpiStream())
759 return;
760 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
761 if (auto e = expectedIpi.takeError())
762 fatal("error retrieving IPI stream: " + toString(std::move(e)));
763 ipiSrc->assignGHashesFromVector(
764 GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
765
766 // The IPI stream isItemIndex bitvector should be all ones.
767 ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size());
768 ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
769 }
770
771 // Flatten discontiguous PDB type arrays to bytes so that we can use
772 // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
773 // type servers is faster than iterating all object files compiled with /Z7 with
774 // CVTypeArray, which has high overheads due to the virtual interface of
775 // BinaryStream::readBytes.
typeArrayToBytes(const CVTypeArray & types)776 static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
777 BinaryStreamRef stream = types.getUnderlyingStream();
778 ArrayRef<uint8_t> debugTypes;
779 checkError(stream.readBytes(0, stream.getLength(), debugTypes));
780 return debugTypes;
781 }
782
783 // Merge types from a type server PDB.
remapTpiWithGHashes(GHashState * g)784 void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
785 assert(config->debugGHashes && "ghashes must be enabled");
786
787 // IPI merging depends on TPI, so do TPI first, then do IPI. No need to
788 // propagate errors, those should've been handled during ghash loading.
789 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
790 pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
791 fillMapFromGHashes(g);
792 tpiMap = indexMapStorage;
793 mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray()));
794 if (pdbFile.hasPDBIpiStream()) {
795 pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
796 ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size());
797 ipiSrc->fillMapFromGHashes(g);
798 ipiMap = ipiSrc->indexMapStorage;
799 ipiSrc->tpiMap = tpiMap;
800 ipiSrc->ipiMap = ipiMap;
801 ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray()));
802
803 if (config->showSummary) {
804 nbTypeRecords = ipiSrc->ghashes.size();
805 nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
806 }
807 }
808
809 if (config->showSummary) {
810 nbTypeRecords += ghashes.size();
811 nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
812 }
813 }
814
remapTpiWithGHashes(GHashState * g)815 void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
816 // No remapping to do with /Zi objects. Simply use the index map from the type
817 // server. Errors should have been reported earlier. Symbols from this object
818 // will be ignored.
819 Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
820 if (!maybeTsSrc) {
821 typeMergingError =
822 joinErrors(std::move(typeMergingError), maybeTsSrc.takeError());
823 return;
824 }
825 TypeServerSource *tsSrc = *maybeTsSrc;
826 tpiMap = tsSrc->tpiMap;
827 ipiMap = tsSrc->ipiMap;
828 }
829
loadGHashes()830 void PrecompSource::loadGHashes() {
831 if (getDebugH(file)) {
832 warn("ignoring .debug$H section; pch with ghash is not implemented");
833 }
834
835 uint32_t ghashIdx = 0;
836 std::vector<GloballyHashedType> hashVec;
837 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
838 // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
839 // the PDB. There must be an entry in the list of ghashes so that the type
840 // indexes of the following records in the /Yc PCH object line up.
841 if (ty.kind() == LF_ENDPRECOMP)
842 endPrecompGHashIdx = ghashIdx;
843
844 hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
845 isItemIndex.push_back(isIdRecord(ty.kind()));
846 ++ghashIdx;
847 });
848 assignGHashesFromVector(std::move(hashVec));
849 }
850
loadGHashes()851 void UsePrecompSource::loadGHashes() {
852 PrecompSource *pchSrc = findPrecompSource(file, precompDependency);
853 if (!pchSrc)
854 return;
855
856 // To compute ghashes of a /Yu object file, we need to build on the the
857 // ghashes of the /Yc PCH object. After we are done hashing, discard the
858 // ghashes from the PCH source so we don't unnecessarily try to deduplicate
859 // them.
860 std::vector<GloballyHashedType> hashVec =
861 pchSrc->ghashes.take_front(precompDependency.getTypesCount());
862 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
863 hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
864 isItemIndex.push_back(isIdRecord(ty.kind()));
865 });
866 hashVec.erase(hashVec.begin(),
867 hashVec.begin() + precompDependency.getTypesCount());
868 assignGHashesFromVector(std::move(hashVec));
869 }
870
remapTpiWithGHashes(GHashState * g)871 void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
872 fillMapFromGHashes(g);
873 // This object was compiled with /Yu, so process the corresponding
874 // precompiled headers object (/Yc) first. Some type indices in the current
875 // object are referencing data in the precompiled headers object, so we need
876 // both to be loaded.
877 if (Error e = mergeInPrecompHeaderObj()) {
878 typeMergingError = joinErrors(std::move(typeMergingError), std::move(e));
879 return;
880 }
881
882 tpiMap = indexMapStorage;
883 ipiMap = indexMapStorage;
884 mergeUniqueTypeRecords(file->debugTypes,
885 TypeIndex(precompDependency.getStartTypeIndex() +
886 precompDependency.getTypesCount()));
887 if (config->showSummary) {
888 nbTypeRecords = ghashes.size();
889 nbTypeRecordsBytes = file->debugTypes.size();
890 }
891 }
892
893 namespace {
894 /// A concurrent hash table for global type hashing. It is based on this paper:
895 /// Concurrent Hash Tables: Fast and General(?)!
896 /// https://dl.acm.org/doi/10.1145/3309206
897 ///
898 /// This hash table is meant to be used in two phases:
899 /// 1. concurrent insertions
900 /// 2. concurrent reads
901 /// It does not support lookup, deletion, or rehashing. It uses linear probing.
902 ///
903 /// The paper describes storing a key-value pair in two machine words.
904 /// Generally, the values stored in this map are type indices, and we can use
905 /// those values to recover the ghash key from a side table. This allows us to
906 /// shrink the table entries further at the cost of some loads, and sidesteps
907 /// the need for a 128 bit atomic compare-and-swap operation.
908 ///
909 /// During insertion, a priority function is used to decide which insertion
910 /// should be preferred. This ensures that the output is deterministic. For
911 /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
912 ///
913 class GHashCell;
914 struct GHashTable {
915 GHashCell *table = nullptr;
916 uint32_t tableSize = 0;
917
918 GHashTable() = default;
919 ~GHashTable();
920
921 /// Initialize the table with the given size. Because the table cannot be
922 /// resized, the initial size of the table must be large enough to contain all
923 /// inputs, or insertion may not be able to find an empty cell.
924 void init(uint32_t newTableSize);
925
926 /// Insert the cell with the given ghash into the table. Return the insertion
927 /// position in the table. It is safe for the caller to store the insertion
928 /// position because the table cannot be resized.
929 uint32_t insert(GloballyHashedType ghash, GHashCell newCell);
930 };
931
932 /// A ghash table cell for deduplicating types from TpiSources.
933 class GHashCell {
934 uint64_t data = 0;
935
936 public:
937 GHashCell() = default;
938
939 // Construct data most to least significant so that sorting works well:
940 // - isItem
941 // - tpiSrcIdx
942 // - ghashIdx
943 // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
944 // non-zero representation.
GHashCell(bool isItem,uint32_t tpiSrcIdx,uint32_t ghashIdx)945 GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
946 : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
947 ghashIdx) {
948 assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
949 assert(ghashIdx == getGHashIdx() && "round trip failure");
950 }
951
GHashCell(uint64_t data)952 explicit GHashCell(uint64_t data) : data(data) {}
953
954 // The empty cell is all zeros.
isEmpty() const955 bool isEmpty() const { return data == 0ULL; }
956
957 /// Extract the tpiSrcIdx.
getTpiSrcIdx() const958 uint32_t getTpiSrcIdx() const {
959 return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
960 }
961
962 /// Extract the index into the ghash array of the TpiSource.
getGHashIdx() const963 uint32_t getGHashIdx() const { return (uint32_t)data; }
964
isItem() const965 bool isItem() const { return data & (1ULL << 63U); }
966
967 /// Get the ghash key for this cell.
getGHash() const968 GloballyHashedType getGHash() const {
969 return TpiSource::instances[getTpiSrcIdx()]->ghashes[getGHashIdx()];
970 }
971
972 /// The priority function for the cell. The data is stored such that lower
973 /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
974 /// from earlier sources are more likely to prevail.
operator <(const GHashCell & l,const GHashCell & r)975 friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
976 return l.data < r.data;
977 }
978 };
979 } // namespace
980
981 namespace lld {
982 namespace coff {
983 /// This type is just a wrapper around GHashTable with external linkage so it
984 /// can be used from a header.
985 struct GHashState {
986 GHashTable table;
987 };
988 } // namespace coff
989 } // namespace lld
990
~GHashTable()991 GHashTable::~GHashTable() { delete[] table; }
992
init(uint32_t newTableSize)993 void GHashTable::init(uint32_t newTableSize) {
994 table = new GHashCell[newTableSize];
995 memset(table, 0, newTableSize * sizeof(GHashCell));
996 tableSize = newTableSize;
997 }
998
insert(GloballyHashedType ghash,GHashCell newCell)999 uint32_t GHashTable::insert(GloballyHashedType ghash, GHashCell newCell) {
1000 assert(!newCell.isEmpty() && "cannot insert empty cell value");
1001
1002 // FIXME: The low bytes of SHA1 have low entropy for short records, which
1003 // type records are. Swap the byte order for better entropy. A better ghash
1004 // won't need this.
1005 uint32_t startIdx =
1006 ByteSwap_64(*reinterpret_cast<uint64_t *>(&ghash)) % tableSize;
1007
1008 // Do a linear probe starting at startIdx.
1009 uint32_t idx = startIdx;
1010 while (true) {
1011 // Run a compare and swap loop. There are four cases:
1012 // - cell is empty: CAS into place and return
1013 // - cell has matching key, earlier priority: do nothing, return
1014 // - cell has matching key, later priority: CAS into place and return
1015 // - cell has non-matching key: hash collision, probe next cell
1016 auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
1017 GHashCell oldCell(cellPtr->load());
1018 while (oldCell.isEmpty() || oldCell.getGHash() == ghash) {
1019 // Check if there is an existing ghash entry with a higher priority
1020 // (earlier ordering). If so, this is a duplicate, we are done.
1021 if (!oldCell.isEmpty() && oldCell < newCell)
1022 return idx;
1023 // Either the cell is empty, or our value is higher priority. Try to
1024 // compare and swap. If it succeeds, we are done.
1025 if (cellPtr->compare_exchange_weak(oldCell, newCell))
1026 return idx;
1027 // If the CAS failed, check this cell again.
1028 }
1029
1030 // Advance the probe. Wrap around to the beginning if we run off the end.
1031 ++idx;
1032 idx = idx == tableSize ? 0 : idx;
1033 if (idx == startIdx) {
1034 // If this becomes an issue, we could mark failure and rehash from the
1035 // beginning with a bigger table. There is no difference between rehashing
1036 // internally and starting over.
1037 report_fatal_error("ghash table is full");
1038 }
1039 }
1040 llvm_unreachable("left infloop");
1041 }
1042
TypeMerger(llvm::BumpPtrAllocator & alloc)1043 TypeMerger::TypeMerger(llvm::BumpPtrAllocator &alloc)
1044 : typeTable(alloc), idTable(alloc) {}
1045
1046 TypeMerger::~TypeMerger() = default;
1047
mergeTypesWithGHash()1048 void TypeMerger::mergeTypesWithGHash() {
1049 // Load ghashes. Do type servers and PCH objects first.
1050 {
1051 ScopedTimer t1(loadGHashTimer);
1052 parallelForEach(TpiSource::dependencySources,
1053 [&](TpiSource *source) { source->loadGHashes(); });
1054 parallelForEach(TpiSource::objectSources,
1055 [&](TpiSource *source) { source->loadGHashes(); });
1056 }
1057
1058 ScopedTimer t2(mergeGHashTimer);
1059 GHashState ghashState;
1060
1061 // Estimate the size of hash table needed to deduplicate ghashes. This *must*
1062 // be larger than the number of unique types, or hash table insertion may not
1063 // be able to find a vacant slot. Summing the input types guarantees this, but
1064 // it is a gross overestimate. The table size could be reduced to save memory,
1065 // but it would require implementing rehashing, and this table is generally
1066 // small compared to total memory usage, at eight bytes per input type record,
1067 // and most input type records are larger than eight bytes.
1068 size_t tableSize = 0;
1069 for (TpiSource *source : TpiSource::instances)
1070 tableSize += source->ghashes.size();
1071
1072 // Cap the table size so that we can use 32-bit cell indices. Type indices are
1073 // also 32-bit, so this is an inherent PDB file format limit anyway.
1074 tableSize =
1075 std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize);
1076 ghashState.table.init(static_cast<uint32_t>(tableSize));
1077
1078 // Insert ghashes in parallel. During concurrent insertion, we cannot observe
1079 // the contents of the hash table cell, but we can remember the insertion
1080 // position. Because the table does not rehash, the position will not change
1081 // under insertion. After insertion is done, the value of the cell can be read
1082 // to retrieve the final PDB type index.
1083 parallelForEachN(0, TpiSource::instances.size(), [&](size_t tpiSrcIdx) {
1084 TpiSource *source = TpiSource::instances[tpiSrcIdx];
1085 source->indexMapStorage.resize(source->ghashes.size());
1086 for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
1087 if (source->shouldOmitFromPdb(i)) {
1088 source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
1089 continue;
1090 }
1091 GloballyHashedType ghash = source->ghashes[i];
1092 bool isItem = source->isItemIndex.test(i);
1093 uint32_t cellIdx =
1094 ghashState.table.insert(ghash, GHashCell(isItem, tpiSrcIdx, i));
1095
1096 // Store the ghash cell index as a type index in indexMapStorage. Later
1097 // we will replace it with the PDB type index.
1098 source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
1099 }
1100 });
1101
1102 // Collect all non-empty cells and sort them. This will implicitly assign
1103 // destination type indices, and partition the entries into type records and
1104 // item records. It arranges types in this order:
1105 // - type records
1106 // - source 0, type 0...
1107 // - source 1, type 1...
1108 // - item records
1109 // - source 0, type 1...
1110 // - source 1, type 0...
1111 std::vector<GHashCell> entries;
1112 for (const GHashCell &cell :
1113 makeArrayRef(ghashState.table.table, tableSize)) {
1114 if (!cell.isEmpty())
1115 entries.push_back(cell);
1116 }
1117 parallelSort(entries, std::less<GHashCell>());
1118 log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1119 tableSize ? double(entries.size()) / tableSize : 0,
1120 entries.size(), tableSize));
1121
1122 // Find out how many type and item indices there are.
1123 auto mid =
1124 std::lower_bound(entries.begin(), entries.end(), GHashCell(true, 0, 0));
1125 assert((mid == entries.end() || mid->isItem()) &&
1126 (mid == entries.begin() || !std::prev(mid)->isItem()) &&
1127 "midpoint is not midpoint");
1128 uint32_t numTypes = std::distance(entries.begin(), mid);
1129 uint32_t numItems = std::distance(mid, entries.end());
1130 log("Tpi record count: " + Twine(numTypes));
1131 log("Ipi record count: " + Twine(numItems));
1132
1133 // Make a list of the "unique" type records to merge for each tpi source. Type
1134 // merging will skip indices not on this list. Store the destination PDB type
1135 // index for these unique types in the tpiMap for each source. The entries for
1136 // non-unique types will be filled in prior to type merging.
1137 for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
1138 auto &cell = entries[i];
1139 uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1140 TpiSource *source = TpiSource::instances[tpiSrcIdx];
1141 source->uniqueTypes.push_back(cell.getGHashIdx());
1142
1143 // Update the ghash table to store the destination PDB type index in the
1144 // table.
1145 uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1146 uint32_t ghashCellIndex =
1147 source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
1148 ghashState.table.table[ghashCellIndex] =
1149 GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1150 }
1151
1152 // In parallel, remap all types.
1153 for_each(TpiSource::dependencySources, [&](TpiSource *source) {
1154 source->remapTpiWithGHashes(&ghashState);
1155 });
1156 parallelForEach(TpiSource::objectSources, [&](TpiSource *source) {
1157 source->remapTpiWithGHashes(&ghashState);
1158 });
1159
1160 // Build a global map of from function ID to function type.
1161 for (TpiSource *source : TpiSource::instances) {
1162 for (auto idToType : source->funcIdToType)
1163 funcIdToType.insert(idToType);
1164 source->funcIdToType.clear();
1165 }
1166
1167 TpiSource::clearGHashes();
1168 }
1169
1170 /// Given the index into the ghash table for a particular type, return the type
1171 /// index for that type in the output PDB.
loadPdbTypeIndexFromCell(GHashState * g,uint32_t ghashCellIdx)1172 static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1173 uint32_t ghashCellIdx) {
1174 GHashCell cell = g->table.table[ghashCellIdx];
1175 return TypeIndex::fromArrayIndex(cell.getGHashIdx());
1176 }
1177
1178 // Fill in a TPI or IPI index map using ghashes. For each source type, use its
1179 // ghash to lookup its final type index in the PDB, and store that in the map.
fillMapFromGHashes(GHashState * g)1180 void TpiSource::fillMapFromGHashes(GHashState *g) {
1181 for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
1182 TypeIndex fakeCellIndex = indexMapStorage[i];
1183 if (fakeCellIndex.isSimple())
1184 indexMapStorage[i] = fakeCellIndex;
1185 else
1186 indexMapStorage[i] =
1187 loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());
1188 }
1189 }
1190
clearGHashes()1191 void TpiSource::clearGHashes() {
1192 for (TpiSource *src : TpiSource::instances) {
1193 if (src->ownedGHashes)
1194 delete[] src->ghashes.data();
1195 src->ghashes = {};
1196 src->isItemIndex.clear();
1197 src->uniqueTypes.clear();
1198 }
1199 }
1200