1 //===- DebugTypes.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "DebugTypes.h"
10 #include "COFFLinkerContext.h"
11 #include "Chunks.h"
12 #include "Driver.h"
13 #include "InputFiles.h"
14 #include "PDB.h"
15 #include "TypeMerger.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
20 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
21 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
22 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
23 #include "llvm/DebugInfo/PDB/GenericError.h"
24 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
25 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
26 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
27 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
28 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
29 #include "llvm/Support/FormatVariadic.h"
30 #include "llvm/Support/Parallel.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Support/TimeProfiler.h"
33 
34 using namespace llvm;
35 using namespace llvm::codeview;
36 using namespace lld;
37 using namespace lld::coff;
38 
39 namespace {
40 class TypeServerIpiSource;
41 
42 // The TypeServerSource class represents a PDB type server, a file referenced by
43 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
44 // files, therefore there must be only once instance per OBJ lot. The file path
45 // is discovered from the dependent OBJ's debug type stream. The
46 // TypeServerSource object is then queued and loaded by the COFF Driver. The
47 // debug type stream for such PDB files will be merged first in the final PDB,
48 // before any dependent OBJ.
49 class TypeServerSource : public TpiSource {
50 public:
TypeServerSource(COFFLinkerContext & ctx,PDBInputFile * f)51   explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
52       : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) {
53     if (f->loadErrorStr)
54       return;
55     pdb::PDBFile &file = f->session->getPDBFile();
56     auto expectedInfo = file.getPDBInfoStream();
57     if (!expectedInfo)
58       return;
59     Guid = expectedInfo->getGuid();
60     auto it = ctx.typeServerSourceMappings.emplace(Guid, this);
61     if (!it.second) {
62       // If we hit here we have collision on Guid's in two PDB files.
63       // This can happen if the PDB Guid is invalid or if we are really
64       // unlucky. This should fall back on stright file-system lookup.
65       it.first->second = nullptr;
66     }
67   }
68 
69   Error mergeDebugT(TypeMerger *m) override;
70 
71   void loadGHashes() override;
72   void remapTpiWithGHashes(GHashState *g) override;
73 
isDependency() const74   bool isDependency() const override { return true; }
75 
76   PDBInputFile *pdbInputFile = nullptr;
77 
78   // TpiSource for IPI stream.
79   TypeServerIpiSource *ipiSrc = nullptr;
80 
81   // The PDB signature GUID.
82   codeview::GUID Guid;
83 };
84 
85 // Companion to TypeServerSource. Stores the index map for the IPI stream in the
86 // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
87 // invariant of one type index space per source.
88 class TypeServerIpiSource : public TpiSource {
89 public:
TypeServerIpiSource(COFFLinkerContext & ctx)90   explicit TypeServerIpiSource(COFFLinkerContext &ctx)
91       : TpiSource(ctx, PDBIpi, nullptr) {}
92 
93   friend class TypeServerSource;
94 
95   // All of the TpiSource methods are no-ops. The parent TypeServerSource
96   // handles both TPI and IPI.
mergeDebugT(TypeMerger * m)97   Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
loadGHashes()98   void loadGHashes() override {}
remapTpiWithGHashes(GHashState * g)99   void remapTpiWithGHashes(GHashState *g) override {}
isDependency() const100   bool isDependency() const override { return true; }
101 };
102 
103 // This class represents the debug type stream of an OBJ file that depends on a
104 // PDB type server (see TypeServerSource).
105 class UseTypeServerSource : public TpiSource {
106   Expected<TypeServerSource *> getTypeServerSource();
107 
108 public:
UseTypeServerSource(COFFLinkerContext & ctx,ObjFile * f,TypeServer2Record ts)109   UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
110       : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {}
111 
112   Error mergeDebugT(TypeMerger *m) override;
113 
114   // No need to load ghashes from /Zi objects.
loadGHashes()115   void loadGHashes() override {}
116   void remapTpiWithGHashes(GHashState *g) override;
117 
118   // Information about the PDB type server dependency, that needs to be loaded
119   // in before merging this OBJ.
120   TypeServer2Record typeServerDependency;
121 };
122 
123 // This class represents the debug type stream of a Microsoft precompiled
124 // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
125 // PDB, before any other OBJs that depend on this. Note that only MSVC generate
126 // such files, clang does not.
127 class PrecompSource : public TpiSource {
128 public:
PrecompSource(COFFLinkerContext & ctx,ObjFile * f)129   PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) {
130     // If the S_OBJNAME record contains the PCH signature, we'll register this
131     // source file right away.
132     registerMapping();
133   }
134 
135   Error mergeDebugT(TypeMerger *m) override;
136 
137   void loadGHashes() override;
138 
isDependency() const139   bool isDependency() const override { return true; }
140 
141 private:
142   void registerMapping();
143 
144   // Whether this precomp OBJ was recorded in the precompSourceMappings map.
145   // Only happens if the file->pchSignature is valid.
146   bool registered = false;
147 };
148 
149 // This class represents the debug type stream of an OBJ file that depends on a
150 // Microsoft precompiled headers OBJ (see PrecompSource).
151 class UsePrecompSource : public TpiSource {
152 public:
UsePrecompSource(COFFLinkerContext & ctx,ObjFile * f,PrecompRecord precomp)153   UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
154       : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {}
155 
156   Error mergeDebugT(TypeMerger *m) override;
157 
158   void loadGHashes() override;
159   void remapTpiWithGHashes(GHashState *g) override;
160 
161 private:
162   Error mergeInPrecompHeaderObj();
163 
164   PrecompSource *findObjByName(StringRef fileNameOnly);
165   PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr);
166   Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr);
167 
168 public:
169   // Information about the Precomp OBJ dependency, that needs to be loaded in
170   // before merging this OBJ.
171   PrecompRecord precompDependency;
172 };
173 } // namespace
174 
TpiSource(COFFLinkerContext & ctx,TpiKind k,ObjFile * f)175 TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
176     : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
177   ctx.addTpiSource(this);
178 }
179 
180 // Vtable key method.
~TpiSource()181 TpiSource::~TpiSource() {
182   // Silence any assertions about unchecked errors.
183   consumeError(std::move(typeMergingError));
184 }
185 
makeTpiSource(COFFLinkerContext & ctx,ObjFile * file)186 TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) {
187   return make<TpiSource>(ctx, TpiSource::Regular, file);
188 }
189 
makeTypeServerSource(COFFLinkerContext & ctx,PDBInputFile * pdbInputFile)190 TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
191                                            PDBInputFile *pdbInputFile) {
192   // Type server sources come in pairs: the TPI stream, and the IPI stream.
193   auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile);
194   if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
195     tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx);
196   return tpiSource;
197 }
198 
makeUseTypeServerSource(COFFLinkerContext & ctx,ObjFile * file,TypeServer2Record ts)199 TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
200                                               ObjFile *file,
201                                               TypeServer2Record ts) {
202   return make<UseTypeServerSource>(ctx, file, ts);
203 }
204 
makePrecompSource(COFFLinkerContext & ctx,ObjFile * file)205 TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) {
206   return make<PrecompSource>(ctx, file);
207 }
208 
makeUsePrecompSource(COFFLinkerContext & ctx,ObjFile * file,PrecompRecord precomp)209 TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
210                                            ObjFile *file,
211                                            PrecompRecord precomp) {
212   return make<UsePrecompSource>(ctx, file, precomp);
213 }
214 
remapTypeIndex(TypeIndex & ti,TiRefKind refKind) const215 bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
216   if (ti.isSimple())
217     return true;
218 
219   // This can be an item index or a type index. Choose the appropriate map.
220   ArrayRef<TypeIndex> tpiOrIpiMap =
221       (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
222   if (ti.toArrayIndex() >= tpiOrIpiMap.size())
223     return false;
224   ti = tpiOrIpiMap[ti.toArrayIndex()];
225   return true;
226 }
227 
remapRecord(MutableArrayRef<uint8_t> rec,ArrayRef<TiReference> typeRefs)228 void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
229                             ArrayRef<TiReference> typeRefs) {
230   MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
231   for (const TiReference &ref : typeRefs) {
232     unsigned byteSize = ref.Count * sizeof(TypeIndex);
233     if (contents.size() < ref.Offset + byteSize)
234       fatal("symbol record too short");
235 
236     MutableArrayRef<TypeIndex> indices(
237         reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
238     for (TypeIndex &ti : indices) {
239       if (!remapTypeIndex(ti, ref.Kind)) {
240         if (ctx.config.verbose) {
241           uint16_t kind =
242               reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
243           StringRef fname = file ? file->getName() : "<unknown PDB>";
244           log("failed to remap type index in record of kind 0x" +
245               utohexstr(kind) + " in " + fname + " with bad " +
246               (ref.Kind == TiRefKind::IndexRef ? "item" : "type") +
247               " index 0x" + utohexstr(ti.getIndex()));
248         }
249         ti = TypeIndex(SimpleTypeKind::NotTranslated);
250         continue;
251       }
252     }
253   }
254 }
255 
remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec)256 void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
257   // TODO: Handle errors similar to symbols.
258   SmallVector<TiReference, 32> typeRefs;
259   discoverTypeIndices(CVType(rec), typeRefs);
260   remapRecord(rec, typeRefs);
261 }
262 
remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec)263 bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
264   // Discover type index references in the record. Skip it if we don't
265   // know where they are.
266   SmallVector<TiReference, 32> typeRefs;
267   if (!discoverTypeIndicesInSymbol(rec, typeRefs))
268     return false;
269   remapRecord(rec, typeRefs);
270   return true;
271 }
272 
273 // A COFF .debug$H section is currently a clang extension.  This function checks
274 // if a .debug$H section is in a format that we expect / understand, so that we
275 // can ignore any sections which are coincidentally also named .debug$H but do
276 // not contain a format we recognize.
canUseDebugH(ArrayRef<uint8_t> debugH)277 static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
278   if (debugH.size() < sizeof(object::debug_h_header))
279     return false;
280   auto *header =
281       reinterpret_cast<const object::debug_h_header *>(debugH.data());
282   debugH = debugH.drop_front(sizeof(object::debug_h_header));
283   return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
284          header->Version == 0 &&
285          header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) &&
286          (debugH.size() % 8 == 0);
287 }
288 
getDebugH(ObjFile * file)289 static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
290   SectionChunk *sec =
291       SectionChunk::findByName(file->getDebugChunks(), ".debug$H");
292   if (!sec)
293     return std::nullopt;
294   ArrayRef<uint8_t> contents = sec->getContents();
295   if (!canUseDebugH(contents))
296     return std::nullopt;
297   return contents;
298 }
299 
300 static ArrayRef<GloballyHashedType>
getHashesFromDebugH(ArrayRef<uint8_t> debugH)301 getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
302   assert(canUseDebugH(debugH));
303   debugH = debugH.drop_front(sizeof(object::debug_h_header));
304   uint32_t count = debugH.size() / sizeof(GloballyHashedType);
305   return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
306 }
307 
308 // Merge .debug$T for a generic object file.
mergeDebugT(TypeMerger * m)309 Error TpiSource::mergeDebugT(TypeMerger *m) {
310   assert(!ctx.config.debugGHashes &&
311          "use remapTpiWithGHashes when ghash is enabled");
312 
313   CVTypeArray types;
314   BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
315   cantFail(reader.readArray(types, reader.getLength()));
316 
317   // When dealing with PCH.OBJ, some indices were already merged.
318   unsigned nbHeadIndices = indexMapStorage.size();
319 
320   std::optional<PCHMergerInfo> pchInfo;
321   if (auto err = mergeTypeAndIdRecords(m->idTable, m->typeTable,
322                                        indexMapStorage, types, pchInfo))
323     fatal("codeview::mergeTypeAndIdRecords failed: " +
324           toString(std::move(err)));
325   if (pchInfo) {
326     file->pchSignature = pchInfo->PCHSignature;
327     endPrecompIdx = pchInfo->EndPrecompIndex;
328   }
329 
330   // In an object, there is only one mapping for both types and items.
331   tpiMap = indexMapStorage;
332   ipiMap = indexMapStorage;
333 
334   if (ctx.config.showSummary) {
335     nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
336     nbTypeRecordsBytes = reader.getLength();
337     // Count how many times we saw each type record in our input. This
338     // calculation requires a second pass over the type records to classify each
339     // record as a type or index. This is slow, but this code executes when
340     // collecting statistics.
341     m->tpiCounts.resize(m->getTypeTable().size());
342     m->ipiCounts.resize(m->getIDTable().size());
343     uint32_t srcIdx = nbHeadIndices;
344     for (const CVType &ty : types) {
345       TypeIndex dstIdx = tpiMap[srcIdx++];
346       // Type merging may fail, so a complex source type may become the simple
347       // NotTranslated type, which cannot be used as an array index.
348       if (dstIdx.isSimple())
349         continue;
350       SmallVectorImpl<uint32_t> &counts =
351           isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts;
352       ++counts[dstIdx.toArrayIndex()];
353     }
354   }
355 
356   return Error::success();
357 }
358 
359 // Merge types from a type server PDB.
mergeDebugT(TypeMerger * m)360 Error TypeServerSource::mergeDebugT(TypeMerger *m) {
361   assert(!ctx.config.debugGHashes &&
362          "use remapTpiWithGHashes when ghash is enabled");
363 
364   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
365   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
366   if (auto e = expectedTpi.takeError())
367     fatal("Type server does not have TPI stream: " + toString(std::move(e)));
368   pdb::TpiStream *maybeIpi = nullptr;
369   if (pdbFile.hasPDBIpiStream()) {
370     Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
371     if (auto e = expectedIpi.takeError())
372       fatal("Error getting type server IPI stream: " + toString(std::move(e)));
373     maybeIpi = &*expectedIpi;
374   }
375 
376   // Merge TPI first, because the IPI stream will reference type indices.
377   if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
378                                   expectedTpi->typeArray()))
379     fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
380   tpiMap = indexMapStorage;
381 
382   // Merge IPI.
383   if (maybeIpi) {
384     if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
385                                   maybeIpi->typeArray()))
386       fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
387     ipiMap = ipiSrc->indexMapStorage;
388   }
389 
390   if (ctx.config.showSummary) {
391     nbTypeRecords = tpiMap.size() + ipiMap.size();
392     nbTypeRecordsBytes =
393         expectedTpi->typeArray().getUnderlyingStream().getLength() +
394         (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
395                   : 0);
396 
397     // Count how many times we saw each type record in our input. If a
398     // destination type index is present in the source to destination type index
399     // map, that means we saw it once in the input. Add it to our histogram.
400     m->tpiCounts.resize(m->getTypeTable().size());
401     m->ipiCounts.resize(m->getIDTable().size());
402     for (TypeIndex ti : tpiMap)
403       if (!ti.isSimple())
404         ++m->tpiCounts[ti.toArrayIndex()];
405     for (TypeIndex ti : ipiMap)
406       if (!ti.isSimple())
407         ++m->ipiCounts[ti.toArrayIndex()];
408   }
409 
410   return Error::success();
411 }
412 
getTypeServerSource()413 Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
414   const codeview::GUID &tsId = typeServerDependency.getGuid();
415   StringRef tsPath = typeServerDependency.getName();
416 
417   TypeServerSource *tsSrc = nullptr;
418   auto it = ctx.typeServerSourceMappings.find(tsId);
419   if (it != ctx.typeServerSourceMappings.end()) {
420     tsSrc = (TypeServerSource *)it->second;
421   }
422   if (tsSrc == nullptr) {
423     // The file failed to load, lookup by name
424     PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file);
425     if (!pdb)
426       return createFileError(tsPath, errorCodeToError(std::error_code(
427                                          ENOENT, std::generic_category())));
428     // If an error occurred during loading, throw it now
429     if (pdb->loadErrorStr)
430       return createFileError(
431           tsPath, make_error<StringError>(*pdb->loadErrorStr,
432                                           llvm::inconvertibleErrorCode()));
433 
434     tsSrc = (TypeServerSource *)pdb->debugTypesObj;
435 
436     // Just because a file with a matching name was found and it was an actual
437     // PDB file doesn't mean it matches.  For it to match the InfoStream's GUID
438     // must match the GUID specified in the TypeServer2 record.
439     if (tsSrc->Guid != tsId) {
440       return createFileError(tsPath,
441                              make_error<pdb::PDBError>(
442                                  pdb::pdb_error_code::signature_out_of_date));
443     }
444   }
445   return tsSrc;
446 }
447 
mergeDebugT(TypeMerger * m)448 Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
449   Expected<TypeServerSource *> tsSrc = getTypeServerSource();
450   if (!tsSrc)
451     return tsSrc.takeError();
452 
453   pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
454   auto expectedInfo = pdbSession.getPDBInfoStream();
455   if (!expectedInfo)
456     return expectedInfo.takeError();
457 
458   // Reuse the type index map of the type server.
459   tpiMap = (*tsSrc)->tpiMap;
460   ipiMap = (*tsSrc)->ipiMap;
461   return Error::success();
462 }
463 
equalsPath(StringRef path1,StringRef path2)464 static bool equalsPath(StringRef path1, StringRef path2) {
465 #if defined(_WIN32)
466   return path1.equals_insensitive(path2);
467 #else
468   return path1.equals(path2);
469 #endif
470 }
471 
472 // Find by name an OBJ provided on the command line
findObjByName(StringRef fileNameOnly)473 PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
474   SmallString<128> currentPath;
475   for (auto kv : ctx.precompSourceMappings) {
476     StringRef currentFileName = sys::path::filename(kv.second->file->getName(),
477                                                     sys::path::Style::windows);
478 
479     // Compare based solely on the file name (link.exe behavior)
480     if (equalsPath(currentFileName, fileNameOnly))
481       return (PrecompSource *)kv.second;
482   }
483   return nullptr;
484 }
485 
findPrecompSource(ObjFile * file,PrecompRecord & pr)486 PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file,
487                                                    PrecompRecord &pr) {
488   // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
489   // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
490   // the paths embedded in the OBJs are in the Windows format.
491   SmallString<128> prFileName =
492       sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
493 
494   auto it = ctx.precompSourceMappings.find(pr.getSignature());
495   if (it != ctx.precompSourceMappings.end()) {
496     return (PrecompSource *)it->second;
497   }
498   // Lookup by name
499   return findObjByName(prFileName);
500 }
501 
findPrecompMap(ObjFile * file,PrecompRecord & pr)502 Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file,
503                                                            PrecompRecord &pr) {
504   PrecompSource *precomp = findPrecompSource(file, pr);
505 
506   if (!precomp)
507     return createFileError(
508         pr.getPrecompFilePath(),
509         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
510 
511   // Don't rely on the PCH signature to validate the concordance between the PCH
512   // and the OBJ that uses it. However we do validate here that the
513   // LF_ENDPRECOMP record index lines up with the number of type records
514   // LF_PRECOMP is expecting.
515   if (precomp->endPrecompIdx != pr.getTypesCount())
516     return createFileError(
517         toString(file),
518         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
519 
520   return precomp;
521 }
522 
523 /// Merges a precompiled headers TPI map into the current TPI map. The
524 /// precompiled headers object will also be loaded and remapped in the
525 /// process.
mergeInPrecompHeaderObj()526 Error UsePrecompSource::mergeInPrecompHeaderObj() {
527   auto e = findPrecompMap(file, precompDependency);
528   if (!e)
529     return e.takeError();
530 
531   PrecompSource *precompSrc = *e;
532   if (precompSrc->tpiMap.empty())
533     return Error::success();
534 
535   assert(precompDependency.getStartTypeIndex() ==
536          TypeIndex::FirstNonSimpleIndex);
537   assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
538   // Use the previously remapped index map from the precompiled headers.
539   indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(),
540                          precompSrc->tpiMap.begin() +
541                              precompDependency.getTypesCount());
542 
543   return Error::success();
544 }
545 
mergeDebugT(TypeMerger * m)546 Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
547   // This object was compiled with /Yu, so process the corresponding
548   // precompiled headers object (/Yc) first. Some type indices in the current
549   // object are referencing data in the precompiled headers object, so we need
550   // both to be loaded.
551   if (Error e = mergeInPrecompHeaderObj())
552     return e;
553 
554   return TpiSource::mergeDebugT(m);
555 }
556 
mergeDebugT(TypeMerger * m)557 Error PrecompSource::mergeDebugT(TypeMerger *m) {
558   // In some cases, the S_OBJNAME record doesn't contain the PCH signature.
559   // The signature comes later with the LF_ENDPRECOMP record, so we first need
560   // to merge in all the .PCH.OBJ file type records, before registering below.
561   if (Error e = TpiSource::mergeDebugT(m))
562     return e;
563 
564   registerMapping();
565 
566   return Error::success();
567 }
568 
registerMapping()569 void PrecompSource::registerMapping() {
570   if (registered)
571     return;
572   if (file->pchSignature && *file->pchSignature) {
573     auto it = ctx.precompSourceMappings.emplace(*file->pchSignature, this);
574     if (!it.second)
575       fatal("a PCH object with the same signature has already been provided (" +
576             toString(it.first->second->file) + " and " + toString(file) + ")");
577     registered = true;
578   }
579 }
580 
581 //===----------------------------------------------------------------------===//
582 // Parellel GHash type merging implementation.
583 //===----------------------------------------------------------------------===//
584 
loadGHashes()585 void TpiSource::loadGHashes() {
586   if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
587     ghashes = getHashesFromDebugH(*debugH);
588     ownedGHashes = false;
589   } else {
590     CVTypeArray types;
591     BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
592     cantFail(reader.readArray(types, reader.getLength()));
593     assignGHashesFromVector(GloballyHashedType::hashTypes(types));
594   }
595 
596   fillIsItemIndexFromDebugT();
597 }
598 
599 // Copies ghashes from a vector into an array. These are long lived, so it's
600 // worth the time to copy these into an appropriately sized vector to reduce
601 // memory usage.
assignGHashesFromVector(std::vector<GloballyHashedType> && hashVec)602 void TpiSource::assignGHashesFromVector(
603     std::vector<GloballyHashedType> &&hashVec) {
604   if (hashVec.empty())
605     return;
606   GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
607   memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType));
608   ghashes = ArrayRef(hashes, hashVec.size());
609   ownedGHashes = true;
610 }
611 
612 // Faster way to iterate type records. forEachTypeChecked is faster than
613 // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
forEachTypeChecked(ArrayRef<uint8_t> types,function_ref<void (const CVType &)> fn)614 static void forEachTypeChecked(ArrayRef<uint8_t> types,
615                                function_ref<void(const CVType &)> fn) {
616   checkError(
617       forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
618         fn(ty);
619         return Error::success();
620       }));
621 }
622 
623 // Walk over file->debugTypes and fill in the isItemIndex bit vector.
624 // TODO: Store this information in .debug$H so that we don't have to recompute
625 // it. This is the main bottleneck slowing down parallel ghashing with one
626 // thread over single-threaded ghashing.
fillIsItemIndexFromDebugT()627 void TpiSource::fillIsItemIndexFromDebugT() {
628   uint32_t index = 0;
629   isItemIndex.resize(ghashes.size());
630   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
631     if (isIdRecord(ty.kind()))
632       isItemIndex.set(index);
633     ++index;
634   });
635 }
636 
mergeTypeRecord(TypeIndex curIndex,CVType ty)637 void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
638   // Decide if the merged type goes into TPI or IPI.
639   bool isItem = isIdRecord(ty.kind());
640   MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
641 
642   // Copy the type into our mutable buffer.
643   assert(ty.length() <= codeview::MaxRecordLength);
644   size_t offset = merged.recs.size();
645   size_t newSize = alignTo(ty.length(), 4);
646   merged.recs.resize(offset + newSize);
647   auto newRec = MutableArrayRef(&merged.recs[offset], newSize);
648   memcpy(newRec.data(), ty.data().data(), newSize);
649 
650   // Fix up the record prefix and padding bytes if it required resizing.
651   if (newSize != ty.length()) {
652     reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
653     for (size_t i = ty.length(); i < newSize; ++i)
654       newRec[i] = LF_PAD0 + (newSize - i);
655   }
656 
657   // Remap the type indices in the new record.
658   remapTypesInTypeRecord(newRec);
659   uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
660   merged.recSizes.push_back(static_cast<uint16_t>(newSize));
661   merged.recHashes.push_back(pdbHash);
662 
663   // Retain a mapping from PDB function id to PDB function type. This mapping is
664   // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
665   // symbols.
666   if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
667     bool success = ty.length() >= 12;
668     TypeIndex funcId = curIndex;
669     if (success)
670       success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
671     TypeIndex funcType =
672         *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]);
673     if (success) {
674       funcIdToType.push_back({funcId, funcType});
675     } else {
676       StringRef fname = file ? file->getName() : "<unknown PDB>";
677       warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) +
678            " in " + fname);
679     }
680   }
681 }
682 
mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,TypeIndex beginIndex)683 void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
684                                        TypeIndex beginIndex) {
685   // Re-sort the list of unique types by index.
686   if (kind == PDB)
687     assert(llvm::is_sorted(uniqueTypes));
688   else
689     llvm::sort(uniqueTypes);
690 
691   // Accumulate all the unique types into one buffer in mergedTypes.
692   uint32_t ghashIndex = 0;
693   auto nextUniqueIndex = uniqueTypes.begin();
694   assert(mergedTpi.recs.empty());
695   assert(mergedIpi.recs.empty());
696 
697   // Pre-compute the number of elements in advance to avoid std::vector resizes.
698   unsigned nbTpiRecs = 0;
699   unsigned nbIpiRecs = 0;
700   forEachTypeChecked(typeRecords, [&](const CVType &ty) {
701     if (nextUniqueIndex != uniqueTypes.end() &&
702         *nextUniqueIndex == ghashIndex) {
703       assert(ty.length() <= codeview::MaxRecordLength);
704       size_t newSize = alignTo(ty.length(), 4);
705       (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
706       ++nextUniqueIndex;
707     }
708     ++ghashIndex;
709   });
710   mergedTpi.recs.reserve(nbTpiRecs);
711   mergedIpi.recs.reserve(nbIpiRecs);
712 
713   // Do the actual type merge.
714   ghashIndex = 0;
715   nextUniqueIndex = uniqueTypes.begin();
716   forEachTypeChecked(typeRecords, [&](const CVType &ty) {
717     if (nextUniqueIndex != uniqueTypes.end() &&
718         *nextUniqueIndex == ghashIndex) {
719       mergeTypeRecord(beginIndex + ghashIndex, ty);
720       ++nextUniqueIndex;
721     }
722     ++ghashIndex;
723   });
724   assert(nextUniqueIndex == uniqueTypes.end() &&
725          "failed to merge all desired records");
726   assert(uniqueTypes.size() ==
727              mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
728          "missing desired record");
729 }
730 
remapTpiWithGHashes(GHashState * g)731 void TpiSource::remapTpiWithGHashes(GHashState *g) {
732   assert(ctx.config.debugGHashes && "ghashes must be enabled");
733   fillMapFromGHashes(g);
734   tpiMap = indexMapStorage;
735   ipiMap = indexMapStorage;
736   mergeUniqueTypeRecords(file->debugTypes);
737   // TODO: Free all unneeded ghash resources now that we have a full index map.
738 
739   if (ctx.config.showSummary) {
740     nbTypeRecords = ghashes.size();
741     nbTypeRecordsBytes = file->debugTypes.size();
742   }
743 }
744 
745 // PDBs do not actually store global hashes, so when merging a type server
746 // PDB we have to synthesize global hashes.  To do this, we first synthesize
747 // global hashes for the TPI stream, since it is independent, then we
748 // synthesize hashes for the IPI stream, using the hashes for the TPI stream
749 // as inputs.
loadGHashes()750 void TypeServerSource::loadGHashes() {
751   // Don't hash twice.
752   if (!ghashes.empty())
753     return;
754   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
755 
756   // Hash TPI stream.
757   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
758   if (auto e = expectedTpi.takeError())
759     fatal("Type server does not have TPI stream: " + toString(std::move(e)));
760   assignGHashesFromVector(
761       GloballyHashedType::hashTypes(expectedTpi->typeArray()));
762   isItemIndex.resize(ghashes.size());
763 
764   // Hash IPI stream, which depends on TPI ghashes.
765   if (!pdbFile.hasPDBIpiStream())
766     return;
767   Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
768   if (auto e = expectedIpi.takeError())
769     fatal("error retrieving IPI stream: " + toString(std::move(e)));
770   ipiSrc->assignGHashesFromVector(
771       GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
772 
773   // The IPI stream isItemIndex bitvector should be all ones.
774   ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size());
775   ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
776 }
777 
778 // Flatten discontiguous PDB type arrays to bytes so that we can use
779 // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
780 // type servers is faster than iterating all object files compiled with /Z7 with
781 // CVTypeArray, which has high overheads due to the virtual interface of
782 // BinaryStream::readBytes.
typeArrayToBytes(const CVTypeArray & types)783 static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
784   BinaryStreamRef stream = types.getUnderlyingStream();
785   ArrayRef<uint8_t> debugTypes;
786   checkError(stream.readBytes(0, stream.getLength(), debugTypes));
787   return debugTypes;
788 }
789 
790 // Merge types from a type server PDB.
remapTpiWithGHashes(GHashState * g)791 void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
792   assert(ctx.config.debugGHashes && "ghashes must be enabled");
793 
794   // IPI merging depends on TPI, so do TPI first, then do IPI.  No need to
795   // propagate errors, those should've been handled during ghash loading.
796   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
797   pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
798   fillMapFromGHashes(g);
799   tpiMap = indexMapStorage;
800   mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray()));
801   if (pdbFile.hasPDBIpiStream()) {
802     pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
803     ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size());
804     ipiSrc->fillMapFromGHashes(g);
805     ipiMap = ipiSrc->indexMapStorage;
806     ipiSrc->tpiMap = tpiMap;
807     ipiSrc->ipiMap = ipiMap;
808     ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray()));
809 
810     if (ctx.config.showSummary) {
811       nbTypeRecords = ipiSrc->ghashes.size();
812       nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
813     }
814   }
815 
816   if (ctx.config.showSummary) {
817     nbTypeRecords += ghashes.size();
818     nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
819   }
820 }
821 
remapTpiWithGHashes(GHashState * g)822 void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
823   // No remapping to do with /Zi objects. Simply use the index map from the type
824   // server. Errors should have been reported earlier. Symbols from this object
825   // will be ignored.
826   Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
827   if (!maybeTsSrc) {
828     typeMergingError =
829         joinErrors(std::move(typeMergingError), maybeTsSrc.takeError());
830     return;
831   }
832   TypeServerSource *tsSrc = *maybeTsSrc;
833   tpiMap = tsSrc->tpiMap;
834   ipiMap = tsSrc->ipiMap;
835 }
836 
loadGHashes()837 void PrecompSource::loadGHashes() {
838   if (getDebugH(file)) {
839     warn("ignoring .debug$H section; pch with ghash is not implemented");
840   }
841 
842   uint32_t ghashIdx = 0;
843   std::vector<GloballyHashedType> hashVec;
844   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
845     // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
846     // the PDB. There must be an entry in the list of ghashes so that the type
847     // indexes of the following records in the /Yc PCH object line up.
848     if (ty.kind() == LF_ENDPRECOMP) {
849       EndPrecompRecord endPrecomp;
850       cantFail(TypeDeserializer::deserializeAs<EndPrecompRecord>(
851           const_cast<CVType &>(ty), endPrecomp));
852       file->pchSignature = endPrecomp.getSignature();
853       registerMapping();
854       endPrecompIdx = ghashIdx;
855     }
856 
857     hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
858     isItemIndex.push_back(isIdRecord(ty.kind()));
859     ++ghashIdx;
860   });
861   assignGHashesFromVector(std::move(hashVec));
862 }
863 
loadGHashes()864 void UsePrecompSource::loadGHashes() {
865   auto e = findPrecompMap(file, precompDependency);
866   if (!e) {
867     warn(toString(e.takeError()));
868     return;
869   }
870 
871   PrecompSource *pchSrc = *e;
872 
873   // To compute ghashes of a /Yu object file, we need to build on the ghashes of
874   // the /Yc PCH object. After we are done hashing, discard the ghashes from the
875   // PCH source so we don't unnecessarily try to deduplicate them.
876   std::vector<GloballyHashedType> hashVec =
877       pchSrc->ghashes.take_front(precompDependency.getTypesCount());
878   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
879     hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
880     isItemIndex.push_back(isIdRecord(ty.kind()));
881   });
882   hashVec.erase(hashVec.begin(),
883                 hashVec.begin() + precompDependency.getTypesCount());
884   assignGHashesFromVector(std::move(hashVec));
885 }
886 
remapTpiWithGHashes(GHashState * g)887 void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
888   fillMapFromGHashes(g);
889   // This object was compiled with /Yu, so process the corresponding
890   // precompiled headers object (/Yc) first. Some type indices in the current
891   // object are referencing data in the precompiled headers object, so we need
892   // both to be loaded.
893   if (Error e = mergeInPrecompHeaderObj()) {
894     typeMergingError = joinErrors(std::move(typeMergingError), std::move(e));
895     return;
896   }
897 
898   tpiMap = indexMapStorage;
899   ipiMap = indexMapStorage;
900   mergeUniqueTypeRecords(file->debugTypes,
901                          TypeIndex(precompDependency.getStartTypeIndex() +
902                                    precompDependency.getTypesCount()));
903   if (ctx.config.showSummary) {
904     nbTypeRecords = ghashes.size();
905     nbTypeRecordsBytes = file->debugTypes.size();
906   }
907 }
908 
909 namespace {
910 /// A concurrent hash table for global type hashing. It is based on this paper:
911 /// Concurrent Hash Tables: Fast and General(?)!
912 /// https://dl.acm.org/doi/10.1145/3309206
913 ///
914 /// This hash table is meant to be used in two phases:
915 /// 1. concurrent insertions
916 /// 2. concurrent reads
917 /// It does not support lookup, deletion, or rehashing. It uses linear probing.
918 ///
919 /// The paper describes storing a key-value pair in two machine words.
920 /// Generally, the values stored in this map are type indices, and we can use
921 /// those values to recover the ghash key from a side table. This allows us to
922 /// shrink the table entries further at the cost of some loads, and sidesteps
923 /// the need for a 128 bit atomic compare-and-swap operation.
924 ///
925 /// During insertion, a priority function is used to decide which insertion
926 /// should be preferred. This ensures that the output is deterministic. For
927 /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
928 ///
929 class GHashCell;
930 struct GHashTable {
931   GHashCell *table = nullptr;
932   uint32_t tableSize = 0;
933 
934   GHashTable() = default;
935   ~GHashTable();
936 
937   /// Initialize the table with the given size. Because the table cannot be
938   /// resized, the initial size of the table must be large enough to contain all
939   /// inputs, or insertion may not be able to find an empty cell.
940   void init(uint32_t newTableSize);
941 
942   /// Insert the cell with the given ghash into the table. Return the insertion
943   /// position in the table. It is safe for the caller to store the insertion
944   /// position because the table cannot be resized.
945   uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
946                   GHashCell newCell);
947 };
948 
949 /// A ghash table cell for deduplicating types from TpiSources.
950 class GHashCell {
951   // Force "data" to be 64-bit aligned; otherwise, some versions of clang
952   // will generate calls to libatomic when using some versions of libstdc++
953   // on 32-bit targets.  (Also, in theory, there could be a target where
954   // new[] doesn't always return an 8-byte-aligned allocation.)
955   alignas(sizeof(uint64_t)) uint64_t data = 0;
956 
957 public:
958   GHashCell() = default;
959 
960   // Construct data most to least significant so that sorting works well:
961   // - isItem
962   // - tpiSrcIdx
963   // - ghashIdx
964   // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
965   // non-zero representation.
GHashCell(bool isItem,uint32_t tpiSrcIdx,uint32_t ghashIdx)966   GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
967       : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
968              ghashIdx) {
969     assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
970     assert(ghashIdx == getGHashIdx() && "round trip failure");
971   }
972 
GHashCell(uint64_t data)973   explicit GHashCell(uint64_t data) : data(data) {}
974 
975   // The empty cell is all zeros.
isEmpty() const976   bool isEmpty() const { return data == 0ULL; }
977 
978   /// Extract the tpiSrcIdx.
getTpiSrcIdx() const979   uint32_t getTpiSrcIdx() const {
980     return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
981   }
982 
983   /// Extract the index into the ghash array of the TpiSource.
getGHashIdx() const984   uint32_t getGHashIdx() const { return (uint32_t)data; }
985 
isItem() const986   bool isItem() const { return data & (1ULL << 63U); }
987 
988   /// Get the ghash key for this cell.
getGHash(const COFFLinkerContext & ctx) const989   GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
990     return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()];
991   }
992 
993   /// The priority function for the cell. The data is stored such that lower
994   /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
995   /// from earlier sources are more likely to prevail.
operator <(const GHashCell & l,const GHashCell & r)996   friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
997     return l.data < r.data;
998   }
999 };
1000 } // namespace
1001 
1002 namespace lld::coff {
1003 /// This type is just a wrapper around GHashTable with external linkage so it
1004 /// can be used from a header.
1005 struct GHashState {
1006   GHashTable table;
1007 };
1008 } // namespace lld::coff
1009 
~GHashTable()1010 GHashTable::~GHashTable() { delete[] table; }
1011 
init(uint32_t newTableSize)1012 void GHashTable::init(uint32_t newTableSize) {
1013   table = new GHashCell[newTableSize];
1014   memset(table, 0, newTableSize * sizeof(GHashCell));
1015   tableSize = newTableSize;
1016 }
1017 
insert(COFFLinkerContext & ctx,GloballyHashedType ghash,GHashCell newCell)1018 uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
1019                             GHashCell newCell) {
1020   assert(!newCell.isEmpty() && "cannot insert empty cell value");
1021 
1022   // FIXME: The low bytes of SHA1 have low entropy for short records, which
1023   // type records are. Swap the byte order for better entropy. A better ghash
1024   // won't need this.
1025   uint32_t startIdx =
1026       llvm::byteswap<uint64_t>(*reinterpret_cast<uint64_t *>(&ghash)) %
1027       tableSize;
1028 
1029   // Do a linear probe starting at startIdx.
1030   uint32_t idx = startIdx;
1031   while (true) {
1032     // Run a compare and swap loop. There are four cases:
1033     // - cell is empty: CAS into place and return
1034     // - cell has matching key, earlier priority: do nothing, return
1035     // - cell has matching key, later priority: CAS into place and return
1036     // - cell has non-matching key: hash collision, probe next cell
1037     auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
1038     GHashCell oldCell(cellPtr->load());
1039     while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) {
1040       // Check if there is an existing ghash entry with a higher priority
1041       // (earlier ordering). If so, this is a duplicate, we are done.
1042       if (!oldCell.isEmpty() && oldCell < newCell)
1043         return idx;
1044       // Either the cell is empty, or our value is higher priority. Try to
1045       // compare and swap. If it succeeds, we are done.
1046       if (cellPtr->compare_exchange_weak(oldCell, newCell))
1047         return idx;
1048       // If the CAS failed, check this cell again.
1049     }
1050 
1051     // Advance the probe. Wrap around to the beginning if we run off the end.
1052     ++idx;
1053     idx = idx == tableSize ? 0 : idx;
1054     if (idx == startIdx) {
1055       // If this becomes an issue, we could mark failure and rehash from the
1056       // beginning with a bigger table. There is no difference between rehashing
1057       // internally and starting over.
1058       report_fatal_error("ghash table is full");
1059     }
1060   }
1061   llvm_unreachable("left infloop");
1062 }
1063 
TypeMerger(COFFLinkerContext & c,llvm::BumpPtrAllocator & alloc)1064 TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
1065     : typeTable(alloc), idTable(alloc), ctx(c) {}
1066 
1067 TypeMerger::~TypeMerger() = default;
1068 
mergeTypesWithGHash()1069 void TypeMerger::mergeTypesWithGHash() {
1070   // Load ghashes. Do type servers and PCH objects first.
1071   {
1072     llvm::TimeTraceScope timeScope("Load GHASHes");
1073     ScopedTimer t1(ctx.loadGHashTimer);
1074     parallelForEach(dependencySources,
1075                     [&](TpiSource *source) { source->loadGHashes(); });
1076     parallelForEach(objectSources,
1077                     [&](TpiSource *source) { source->loadGHashes(); });
1078   }
1079 
1080   llvm::TimeTraceScope timeScope("Merge types (GHASH)");
1081   ScopedTimer t2(ctx.mergeGHashTimer);
1082   GHashState ghashState;
1083 
1084   // Estimate the size of hash table needed to deduplicate ghashes. This *must*
1085   // be larger than the number of unique types, or hash table insertion may not
1086   // be able to find a vacant slot. Summing the input types guarantees this, but
1087   // it is a gross overestimate. The table size could be reduced to save memory,
1088   // but it would require implementing rehashing, and this table is generally
1089   // small compared to total memory usage, at eight bytes per input type record,
1090   // and most input type records are larger than eight bytes.
1091   size_t tableSize = 0;
1092   for (TpiSource *source : ctx.tpiSourceList)
1093     tableSize += source->ghashes.size();
1094 
1095   // Cap the table size so that we can use 32-bit cell indices. Type indices are
1096   // also 32-bit, so this is an inherent PDB file format limit anyway.
1097   tableSize =
1098       std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize);
1099   ghashState.table.init(static_cast<uint32_t>(tableSize));
1100 
1101   // Insert ghashes in parallel. During concurrent insertion, we cannot observe
1102   // the contents of the hash table cell, but we can remember the insertion
1103   // position. Because the table does not rehash, the position will not change
1104   // under insertion. After insertion is done, the value of the cell can be read
1105   // to retrieve the final PDB type index.
1106   parallelFor(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) {
1107     TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1108     source->indexMapStorage.resize(source->ghashes.size());
1109     for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
1110       if (source->shouldOmitFromPdb(i)) {
1111         source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
1112         continue;
1113       }
1114       GloballyHashedType ghash = source->ghashes[i];
1115       bool isItem = source->isItemIndex.test(i);
1116       uint32_t cellIdx =
1117           ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i));
1118 
1119       // Store the ghash cell index as a type index in indexMapStorage. Later
1120       // we will replace it with the PDB type index.
1121       source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
1122     }
1123   });
1124 
1125   // Collect all non-empty cells and sort them. This will implicitly assign
1126   // destination type indices, and partition the entries into type records and
1127   // item records. It arranges types in this order:
1128   // - type records
1129   //   - source 0, type 0...
1130   //   - source 1, type 1...
1131   // - item records
1132   //   - source 0, type 1...
1133   //   - source 1, type 0...
1134   std::vector<GHashCell> entries;
1135   for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) {
1136     if (!cell.isEmpty())
1137       entries.push_back(cell);
1138   }
1139   parallelSort(entries, std::less<GHashCell>());
1140   log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1141               tableSize ? double(entries.size()) / tableSize : 0,
1142               entries.size(), tableSize));
1143 
1144   // Find out how many type and item indices there are.
1145   auto mid = llvm::lower_bound(entries, GHashCell(true, 0, 0));
1146   assert((mid == entries.end() || mid->isItem()) &&
1147          (mid == entries.begin() || !std::prev(mid)->isItem()) &&
1148          "midpoint is not midpoint");
1149   uint32_t numTypes = std::distance(entries.begin(), mid);
1150   uint32_t numItems = std::distance(mid, entries.end());
1151   log("Tpi record count: " + Twine(numTypes));
1152   log("Ipi record count: " + Twine(numItems));
1153 
1154   // Make a list of the "unique" type records to merge for each tpi source. Type
1155   // merging will skip indices not on this list. Store the destination PDB type
1156   // index for these unique types in the tpiMap for each source. The entries for
1157   // non-unique types will be filled in prior to type merging.
1158   for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
1159     auto &cell = entries[i];
1160     uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1161     TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1162     source->uniqueTypes.push_back(cell.getGHashIdx());
1163 
1164     // Update the ghash table to store the destination PDB type index in the
1165     // table.
1166     uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1167     uint32_t ghashCellIndex =
1168         source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
1169     ghashState.table.table[ghashCellIndex] =
1170         GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1171   }
1172 
1173   // In parallel, remap all types.
1174   for (TpiSource *source : dependencySources)
1175     source->remapTpiWithGHashes(&ghashState);
1176   parallelForEach(objectSources, [&](TpiSource *source) {
1177     source->remapTpiWithGHashes(&ghashState);
1178   });
1179 
1180   // Build a global map of from function ID to function type.
1181   for (TpiSource *source : ctx.tpiSourceList) {
1182     for (auto idToType : source->funcIdToType)
1183       funcIdToType.insert(idToType);
1184     source->funcIdToType.clear();
1185   }
1186 
1187   clearGHashes();
1188 }
1189 
sortDependencies()1190 void TypeMerger::sortDependencies() {
1191   // Order dependencies first, but preserve the existing order.
1192   std::vector<TpiSource *> deps;
1193   std::vector<TpiSource *> objs;
1194   for (TpiSource *s : ctx.tpiSourceList)
1195     (s->isDependency() ? deps : objs).push_back(s);
1196   uint32_t numDeps = deps.size();
1197   uint32_t numObjs = objs.size();
1198   ctx.tpiSourceList = std::move(deps);
1199   ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end());
1200   for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i)
1201     ctx.tpiSourceList[i]->tpiSrcIdx = i;
1202   dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps);
1203   objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs);
1204 }
1205 
1206 /// Given the index into the ghash table for a particular type, return the type
1207 /// index for that type in the output PDB.
loadPdbTypeIndexFromCell(GHashState * g,uint32_t ghashCellIdx)1208 static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1209                                           uint32_t ghashCellIdx) {
1210   GHashCell cell = g->table.table[ghashCellIdx];
1211   return TypeIndex::fromArrayIndex(cell.getGHashIdx());
1212 }
1213 
1214 /// Free heap allocated ghashes.
clearGHashes()1215 void TypeMerger::clearGHashes() {
1216   for (TpiSource *src : ctx.tpiSourceList) {
1217     if (src->ownedGHashes)
1218       delete[] src->ghashes.data();
1219     src->ghashes = {};
1220     src->isItemIndex.clear();
1221     src->uniqueTypes.clear();
1222   }
1223 }
1224 
1225 // Fill in a TPI or IPI index map using ghashes. For each source type, use its
1226 // ghash to lookup its final type index in the PDB, and store that in the map.
fillMapFromGHashes(GHashState * g)1227 void TpiSource::fillMapFromGHashes(GHashState *g) {
1228   for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
1229     TypeIndex fakeCellIndex = indexMapStorage[i];
1230     if (fakeCellIndex.isSimple())
1231       indexMapStorage[i] = fakeCellIndex;
1232     else
1233       indexMapStorage[i] =
1234           loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());
1235   }
1236 }
1237