xref: /openbsd/gnu/llvm/lld/COFF/DebugTypes.cpp (revision dfe94b16)
1 //===- DebugTypes.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "DebugTypes.h"
10 #include "COFFLinkerContext.h"
11 #include "Chunks.h"
12 #include "Driver.h"
13 #include "InputFiles.h"
14 #include "PDB.h"
15 #include "TypeMerger.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
19 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
20 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
21 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
22 #include "llvm/DebugInfo/PDB/GenericError.h"
23 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
24 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
25 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
26 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
27 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
28 #include "llvm/Support/FormatVariadic.h"
29 #include "llvm/Support/Parallel.h"
30 #include "llvm/Support/Path.h"
31 
32 using namespace llvm;
33 using namespace llvm::codeview;
34 using namespace lld;
35 using namespace lld::coff;
36 
37 namespace {
38 class TypeServerIpiSource;
39 
40 // The TypeServerSource class represents a PDB type server, a file referenced by
41 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
42 // files, therefore there must be only once instance per OBJ lot. The file path
43 // is discovered from the dependent OBJ's debug type stream. The
44 // TypeServerSource object is then queued and loaded by the COFF Driver. The
45 // debug type stream for such PDB files will be merged first in the final PDB,
46 // before any dependent OBJ.
47 class TypeServerSource : public TpiSource {
48 public:
TypeServerSource(COFFLinkerContext & ctx,PDBInputFile * f)49   explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
50       : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) {
51     if (f->loadErrorStr)
52       return;
53     pdb::PDBFile &file = f->session->getPDBFile();
54     auto expectedInfo = file.getPDBInfoStream();
55     if (!expectedInfo)
56       return;
57     Guid = expectedInfo->getGuid();
58     auto it = ctx.typeServerSourceMappings.emplace(Guid, this);
59     if (!it.second) {
60       // If we hit here we have collision on Guid's in two PDB files.
61       // This can happen if the PDB Guid is invalid or if we are really
62       // unlucky. This should fall back on stright file-system lookup.
63       it.first->second = nullptr;
64     }
65   }
66 
67   Error mergeDebugT(TypeMerger *m) override;
68 
69   void loadGHashes() override;
70   void remapTpiWithGHashes(GHashState *g) override;
71 
isDependency() const72   bool isDependency() const override { return true; }
73 
74   PDBInputFile *pdbInputFile = nullptr;
75 
76   // TpiSource for IPI stream.
77   TypeServerIpiSource *ipiSrc = nullptr;
78 
79   // The PDB signature GUID.
80   codeview::GUID Guid;
81 };
82 
83 // Companion to TypeServerSource. Stores the index map for the IPI stream in the
84 // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
85 // invariant of one type index space per source.
86 class TypeServerIpiSource : public TpiSource {
87 public:
TypeServerIpiSource(COFFLinkerContext & ctx)88   explicit TypeServerIpiSource(COFFLinkerContext &ctx)
89       : TpiSource(ctx, PDBIpi, nullptr) {}
90 
91   friend class TypeServerSource;
92 
93   // All of the TpiSource methods are no-ops. The parent TypeServerSource
94   // handles both TPI and IPI.
mergeDebugT(TypeMerger * m)95   Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
loadGHashes()96   void loadGHashes() override {}
remapTpiWithGHashes(GHashState * g)97   void remapTpiWithGHashes(GHashState *g) override {}
isDependency() const98   bool isDependency() const override { return true; }
99 };
100 
101 // This class represents the debug type stream of an OBJ file that depends on a
102 // PDB type server (see TypeServerSource).
103 class UseTypeServerSource : public TpiSource {
104   Expected<TypeServerSource *> getTypeServerSource();
105 
106 public:
UseTypeServerSource(COFFLinkerContext & ctx,ObjFile * f,TypeServer2Record ts)107   UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
108       : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {}
109 
110   Error mergeDebugT(TypeMerger *m) override;
111 
112   // No need to load ghashes from /Zi objects.
loadGHashes()113   void loadGHashes() override {}
114   void remapTpiWithGHashes(GHashState *g) override;
115 
116   // Information about the PDB type server dependency, that needs to be loaded
117   // in before merging this OBJ.
118   TypeServer2Record typeServerDependency;
119 };
120 
121 // This class represents the debug type stream of a Microsoft precompiled
122 // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
123 // PDB, before any other OBJs that depend on this. Note that only MSVC generate
124 // such files, clang does not.
125 class PrecompSource : public TpiSource {
126 public:
PrecompSource(COFFLinkerContext & ctx,ObjFile * f)127   PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) {
128     // If the S_OBJNAME record contains the PCH signature, we'll register this
129     // source file right away.
130     registerMapping();
131   }
132 
133   Error mergeDebugT(TypeMerger *m) override;
134 
135   void loadGHashes() override;
136 
isDependency() const137   bool isDependency() const override { return true; }
138 
139 private:
140   void registerMapping();
141 
142   // Whether this precomp OBJ was recorded in the precompSourceMappings map.
143   // Only happens if the file->pchSignature is valid.
144   bool registered = false;
145 };
146 
147 // This class represents the debug type stream of an OBJ file that depends on a
148 // Microsoft precompiled headers OBJ (see PrecompSource).
149 class UsePrecompSource : public TpiSource {
150 public:
UsePrecompSource(COFFLinkerContext & ctx,ObjFile * f,PrecompRecord precomp)151   UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
152       : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {}
153 
154   Error mergeDebugT(TypeMerger *m) override;
155 
156   void loadGHashes() override;
157   void remapTpiWithGHashes(GHashState *g) override;
158 
159 private:
160   Error mergeInPrecompHeaderObj();
161 
162   PrecompSource *findObjByName(StringRef fileNameOnly);
163   PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr);
164   Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr);
165 
166 public:
167   // Information about the Precomp OBJ dependency, that needs to be loaded in
168   // before merging this OBJ.
169   PrecompRecord precompDependency;
170 };
171 } // namespace
172 
TpiSource(COFFLinkerContext & ctx,TpiKind k,ObjFile * f)173 TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
174     : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
175   ctx.addTpiSource(this);
176 }
177 
178 // Vtable key method.
~TpiSource()179 TpiSource::~TpiSource() {
180   // Silence any assertions about unchecked errors.
181   consumeError(std::move(typeMergingError));
182 }
183 
makeTpiSource(COFFLinkerContext & ctx,ObjFile * file)184 TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) {
185   return make<TpiSource>(ctx, TpiSource::Regular, file);
186 }
187 
makeTypeServerSource(COFFLinkerContext & ctx,PDBInputFile * pdbInputFile)188 TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
189                                            PDBInputFile *pdbInputFile) {
190   // Type server sources come in pairs: the TPI stream, and the IPI stream.
191   auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile);
192   if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
193     tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx);
194   return tpiSource;
195 }
196 
makeUseTypeServerSource(COFFLinkerContext & ctx,ObjFile * file,TypeServer2Record ts)197 TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
198                                               ObjFile *file,
199                                               TypeServer2Record ts) {
200   return make<UseTypeServerSource>(ctx, file, ts);
201 }
202 
makePrecompSource(COFFLinkerContext & ctx,ObjFile * file)203 TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) {
204   return make<PrecompSource>(ctx, file);
205 }
206 
makeUsePrecompSource(COFFLinkerContext & ctx,ObjFile * file,PrecompRecord precomp)207 TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
208                                            ObjFile *file,
209                                            PrecompRecord precomp) {
210   return make<UsePrecompSource>(ctx, file, precomp);
211 }
212 
remapTypeIndex(TypeIndex & ti,TiRefKind refKind) const213 bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
214   if (ti.isSimple())
215     return true;
216 
217   // This can be an item index or a type index. Choose the appropriate map.
218   ArrayRef<TypeIndex> tpiOrIpiMap =
219       (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
220   if (ti.toArrayIndex() >= tpiOrIpiMap.size())
221     return false;
222   ti = tpiOrIpiMap[ti.toArrayIndex()];
223   return true;
224 }
225 
remapRecord(MutableArrayRef<uint8_t> rec,ArrayRef<TiReference> typeRefs)226 void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
227                             ArrayRef<TiReference> typeRefs) {
228   MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
229   for (const TiReference &ref : typeRefs) {
230     unsigned byteSize = ref.Count * sizeof(TypeIndex);
231     if (contents.size() < ref.Offset + byteSize)
232       fatal("symbol record too short");
233 
234     MutableArrayRef<TypeIndex> indices(
235         reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
236     for (TypeIndex &ti : indices) {
237       if (!remapTypeIndex(ti, ref.Kind)) {
238         if (ctx.config.verbose) {
239           uint16_t kind =
240               reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
241           StringRef fname = file ? file->getName() : "<unknown PDB>";
242           log("failed to remap type index in record of kind 0x" +
243               utohexstr(kind) + " in " + fname + " with bad " +
244               (ref.Kind == TiRefKind::IndexRef ? "item" : "type") +
245               " index 0x" + utohexstr(ti.getIndex()));
246         }
247         ti = TypeIndex(SimpleTypeKind::NotTranslated);
248         continue;
249       }
250     }
251   }
252 }
253 
remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec)254 void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
255   // TODO: Handle errors similar to symbols.
256   SmallVector<TiReference, 32> typeRefs;
257   discoverTypeIndices(CVType(rec), typeRefs);
258   remapRecord(rec, typeRefs);
259 }
260 
remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec)261 bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
262   // Discover type index references in the record. Skip it if we don't
263   // know where they are.
264   SmallVector<TiReference, 32> typeRefs;
265   if (!discoverTypeIndicesInSymbol(rec, typeRefs))
266     return false;
267   remapRecord(rec, typeRefs);
268   return true;
269 }
270 
271 // A COFF .debug$H section is currently a clang extension.  This function checks
272 // if a .debug$H section is in a format that we expect / understand, so that we
273 // can ignore any sections which are coincidentally also named .debug$H but do
274 // not contain a format we recognize.
canUseDebugH(ArrayRef<uint8_t> debugH)275 static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
276   if (debugH.size() < sizeof(object::debug_h_header))
277     return false;
278   auto *header =
279       reinterpret_cast<const object::debug_h_header *>(debugH.data());
280   debugH = debugH.drop_front(sizeof(object::debug_h_header));
281   return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
282          header->Version == 0 &&
283          header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) &&
284          (debugH.size() % 8 == 0);
285 }
286 
getDebugH(ObjFile * file)287 static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
288   SectionChunk *sec =
289       SectionChunk::findByName(file->getDebugChunks(), ".debug$H");
290   if (!sec)
291     return std::nullopt;
292   ArrayRef<uint8_t> contents = sec->getContents();
293   if (!canUseDebugH(contents))
294     return std::nullopt;
295   return contents;
296 }
297 
298 static ArrayRef<GloballyHashedType>
getHashesFromDebugH(ArrayRef<uint8_t> debugH)299 getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
300   assert(canUseDebugH(debugH));
301   debugH = debugH.drop_front(sizeof(object::debug_h_header));
302   uint32_t count = debugH.size() / sizeof(GloballyHashedType);
303   return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
304 }
305 
306 // Merge .debug$T for a generic object file.
mergeDebugT(TypeMerger * m)307 Error TpiSource::mergeDebugT(TypeMerger *m) {
308   assert(!ctx.config.debugGHashes &&
309          "use remapTpiWithGHashes when ghash is enabled");
310 
311   CVTypeArray types;
312   BinaryStreamReader reader(file->debugTypes, support::little);
313   cantFail(reader.readArray(types, reader.getLength()));
314 
315   // When dealing with PCH.OBJ, some indices were already merged.
316   unsigned nbHeadIndices = indexMapStorage.size();
317 
318   std::optional<PCHMergerInfo> pchInfo;
319   if (auto err = mergeTypeAndIdRecords(m->idTable, m->typeTable,
320                                        indexMapStorage, types, pchInfo))
321     fatal("codeview::mergeTypeAndIdRecords failed: " +
322           toString(std::move(err)));
323   if (pchInfo) {
324     file->pchSignature = pchInfo->PCHSignature;
325     endPrecompIdx = pchInfo->EndPrecompIndex;
326   }
327 
328   // In an object, there is only one mapping for both types and items.
329   tpiMap = indexMapStorage;
330   ipiMap = indexMapStorage;
331 
332   if (ctx.config.showSummary) {
333     nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
334     nbTypeRecordsBytes = reader.getLength();
335     // Count how many times we saw each type record in our input. This
336     // calculation requires a second pass over the type records to classify each
337     // record as a type or index. This is slow, but this code executes when
338     // collecting statistics.
339     m->tpiCounts.resize(m->getTypeTable().size());
340     m->ipiCounts.resize(m->getIDTable().size());
341     uint32_t srcIdx = nbHeadIndices;
342     for (const CVType &ty : types) {
343       TypeIndex dstIdx = tpiMap[srcIdx++];
344       // Type merging may fail, so a complex source type may become the simple
345       // NotTranslated type, which cannot be used as an array index.
346       if (dstIdx.isSimple())
347         continue;
348       SmallVectorImpl<uint32_t> &counts =
349           isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts;
350       ++counts[dstIdx.toArrayIndex()];
351     }
352   }
353 
354   return Error::success();
355 }
356 
357 // Merge types from a type server PDB.
mergeDebugT(TypeMerger * m)358 Error TypeServerSource::mergeDebugT(TypeMerger *m) {
359   assert(!ctx.config.debugGHashes &&
360          "use remapTpiWithGHashes when ghash is enabled");
361 
362   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
363   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
364   if (auto e = expectedTpi.takeError())
365     fatal("Type server does not have TPI stream: " + toString(std::move(e)));
366   pdb::TpiStream *maybeIpi = nullptr;
367   if (pdbFile.hasPDBIpiStream()) {
368     Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
369     if (auto e = expectedIpi.takeError())
370       fatal("Error getting type server IPI stream: " + toString(std::move(e)));
371     maybeIpi = &*expectedIpi;
372   }
373 
374   // Merge TPI first, because the IPI stream will reference type indices.
375   if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
376                                   expectedTpi->typeArray()))
377     fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
378   tpiMap = indexMapStorage;
379 
380   // Merge IPI.
381   if (maybeIpi) {
382     if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
383                                   maybeIpi->typeArray()))
384       fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
385     ipiMap = ipiSrc->indexMapStorage;
386   }
387 
388   if (ctx.config.showSummary) {
389     nbTypeRecords = tpiMap.size() + ipiMap.size();
390     nbTypeRecordsBytes =
391         expectedTpi->typeArray().getUnderlyingStream().getLength() +
392         (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
393                   : 0);
394 
395     // Count how many times we saw each type record in our input. If a
396     // destination type index is present in the source to destination type index
397     // map, that means we saw it once in the input. Add it to our histogram.
398     m->tpiCounts.resize(m->getTypeTable().size());
399     m->ipiCounts.resize(m->getIDTable().size());
400     for (TypeIndex ti : tpiMap)
401       if (!ti.isSimple())
402         ++m->tpiCounts[ti.toArrayIndex()];
403     for (TypeIndex ti : ipiMap)
404       if (!ti.isSimple())
405         ++m->ipiCounts[ti.toArrayIndex()];
406   }
407 
408   return Error::success();
409 }
410 
getTypeServerSource()411 Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
412   const codeview::GUID &tsId = typeServerDependency.getGuid();
413   StringRef tsPath = typeServerDependency.getName();
414 
415   TypeServerSource *tsSrc = nullptr;
416   auto it = ctx.typeServerSourceMappings.find(tsId);
417   if (it != ctx.typeServerSourceMappings.end()) {
418     tsSrc = (TypeServerSource *)it->second;
419   }
420   if (tsSrc == nullptr) {
421     // The file failed to load, lookup by name
422     PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file);
423     if (!pdb)
424       return createFileError(tsPath, errorCodeToError(std::error_code(
425                                          ENOENT, std::generic_category())));
426     // If an error occurred during loading, throw it now
427     if (pdb->loadErrorStr)
428       return createFileError(
429           tsPath, make_error<StringError>(*pdb->loadErrorStr,
430                                           llvm::inconvertibleErrorCode()));
431 
432     tsSrc = (TypeServerSource *)pdb->debugTypesObj;
433 
434     // Just because a file with a matching name was found and it was an actual
435     // PDB file doesn't mean it matches.  For it to match the InfoStream's GUID
436     // must match the GUID specified in the TypeServer2 record.
437     if (tsSrc->Guid != tsId) {
438       return createFileError(tsPath,
439                              make_error<pdb::PDBError>(
440                                  pdb::pdb_error_code::signature_out_of_date));
441     }
442   }
443   return tsSrc;
444 }
445 
mergeDebugT(TypeMerger * m)446 Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
447   Expected<TypeServerSource *> tsSrc = getTypeServerSource();
448   if (!tsSrc)
449     return tsSrc.takeError();
450 
451   pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
452   auto expectedInfo = pdbSession.getPDBInfoStream();
453   if (!expectedInfo)
454     return expectedInfo.takeError();
455 
456   // Reuse the type index map of the type server.
457   tpiMap = (*tsSrc)->tpiMap;
458   ipiMap = (*tsSrc)->ipiMap;
459   return Error::success();
460 }
461 
equalsPath(StringRef path1,StringRef path2)462 static bool equalsPath(StringRef path1, StringRef path2) {
463 #if defined(_WIN32)
464   return path1.equals_insensitive(path2);
465 #else
466   return path1.equals(path2);
467 #endif
468 }
469 
470 // Find by name an OBJ provided on the command line
findObjByName(StringRef fileNameOnly)471 PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
472   SmallString<128> currentPath;
473   for (auto kv : ctx.precompSourceMappings) {
474     StringRef currentFileName = sys::path::filename(kv.second->file->getName(),
475                                                     sys::path::Style::windows);
476 
477     // Compare based solely on the file name (link.exe behavior)
478     if (equalsPath(currentFileName, fileNameOnly))
479       return (PrecompSource *)kv.second;
480   }
481   return nullptr;
482 }
483 
findPrecompSource(ObjFile * file,PrecompRecord & pr)484 PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file,
485                                                    PrecompRecord &pr) {
486   // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
487   // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
488   // the paths embedded in the OBJs are in the Windows format.
489   SmallString<128> prFileName =
490       sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
491 
492   auto it = ctx.precompSourceMappings.find(pr.getSignature());
493   if (it != ctx.precompSourceMappings.end()) {
494     return (PrecompSource *)it->second;
495   }
496   // Lookup by name
497   return findObjByName(prFileName);
498 }
499 
findPrecompMap(ObjFile * file,PrecompRecord & pr)500 Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file,
501                                                            PrecompRecord &pr) {
502   PrecompSource *precomp = findPrecompSource(file, pr);
503 
504   if (!precomp)
505     return createFileError(
506         pr.getPrecompFilePath(),
507         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
508 
509   // Don't rely on the PCH signature to validate the concordance between the PCH
510   // and the OBJ that uses it. However we do validate here that the
511   // LF_ENDPRECOMP record index lines up with the number of type records
512   // LF_PRECOMP is expecting.
513   if (precomp->endPrecompIdx != pr.getTypesCount())
514     return createFileError(
515         toString(file),
516         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
517 
518   return precomp;
519 }
520 
521 /// Merges a precompiled headers TPI map into the current TPI map. The
522 /// precompiled headers object will also be loaded and remapped in the
523 /// process.
mergeInPrecompHeaderObj()524 Error UsePrecompSource::mergeInPrecompHeaderObj() {
525   auto e = findPrecompMap(file, precompDependency);
526   if (!e)
527     return e.takeError();
528 
529   PrecompSource *precompSrc = *e;
530   if (precompSrc->tpiMap.empty())
531     return Error::success();
532 
533   assert(precompDependency.getStartTypeIndex() ==
534          TypeIndex::FirstNonSimpleIndex);
535   assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
536   // Use the previously remapped index map from the precompiled headers.
537   indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(),
538                          precompSrc->tpiMap.begin() +
539                              precompDependency.getTypesCount());
540 
541   return Error::success();
542 }
543 
mergeDebugT(TypeMerger * m)544 Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
545   // This object was compiled with /Yu, so process the corresponding
546   // precompiled headers object (/Yc) first. Some type indices in the current
547   // object are referencing data in the precompiled headers object, so we need
548   // both to be loaded.
549   if (Error e = mergeInPrecompHeaderObj())
550     return e;
551 
552   return TpiSource::mergeDebugT(m);
553 }
554 
mergeDebugT(TypeMerger * m)555 Error PrecompSource::mergeDebugT(TypeMerger *m) {
556   // In some cases, the S_OBJNAME record doesn't contain the PCH signature.
557   // The signature comes later with the LF_ENDPRECOMP record, so we first need
558   // to merge in all the .PCH.OBJ file type records, before registering below.
559   if (Error e = TpiSource::mergeDebugT(m))
560     return e;
561 
562   registerMapping();
563 
564   return Error::success();
565 }
566 
registerMapping()567 void PrecompSource::registerMapping() {
568   if (registered)
569     return;
570   if (file->pchSignature && *file->pchSignature) {
571     auto it = ctx.precompSourceMappings.emplace(*file->pchSignature, this);
572     if (!it.second)
573       fatal("a PCH object with the same signature has already been provided (" +
574             toString(it.first->second->file) + " and " + toString(file) + ")");
575     registered = true;
576   }
577 }
578 
579 //===----------------------------------------------------------------------===//
580 // Parellel GHash type merging implementation.
581 //===----------------------------------------------------------------------===//
582 
loadGHashes()583 void TpiSource::loadGHashes() {
584   if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
585     ghashes = getHashesFromDebugH(*debugH);
586     ownedGHashes = false;
587   } else {
588     CVTypeArray types;
589     BinaryStreamReader reader(file->debugTypes, support::little);
590     cantFail(reader.readArray(types, reader.getLength()));
591     assignGHashesFromVector(GloballyHashedType::hashTypes(types));
592   }
593 
594   fillIsItemIndexFromDebugT();
595 }
596 
597 // Copies ghashes from a vector into an array. These are long lived, so it's
598 // worth the time to copy these into an appropriately sized vector to reduce
599 // memory usage.
assignGHashesFromVector(std::vector<GloballyHashedType> && hashVec)600 void TpiSource::assignGHashesFromVector(
601     std::vector<GloballyHashedType> &&hashVec) {
602   if (hashVec.empty())
603     return;
604   GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
605   memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType));
606   ghashes = ArrayRef(hashes, hashVec.size());
607   ownedGHashes = true;
608 }
609 
610 // Faster way to iterate type records. forEachTypeChecked is faster than
611 // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
forEachTypeChecked(ArrayRef<uint8_t> types,function_ref<void (const CVType &)> fn)612 static void forEachTypeChecked(ArrayRef<uint8_t> types,
613                                function_ref<void(const CVType &)> fn) {
614   checkError(
615       forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
616         fn(ty);
617         return Error::success();
618       }));
619 }
620 
621 // Walk over file->debugTypes and fill in the isItemIndex bit vector.
622 // TODO: Store this information in .debug$H so that we don't have to recompute
623 // it. This is the main bottleneck slowing down parallel ghashing with one
624 // thread over single-threaded ghashing.
fillIsItemIndexFromDebugT()625 void TpiSource::fillIsItemIndexFromDebugT() {
626   uint32_t index = 0;
627   isItemIndex.resize(ghashes.size());
628   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
629     if (isIdRecord(ty.kind()))
630       isItemIndex.set(index);
631     ++index;
632   });
633 }
634 
mergeTypeRecord(TypeIndex curIndex,CVType ty)635 void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
636   // Decide if the merged type goes into TPI or IPI.
637   bool isItem = isIdRecord(ty.kind());
638   MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
639 
640   // Copy the type into our mutable buffer.
641   assert(ty.length() <= codeview::MaxRecordLength);
642   size_t offset = merged.recs.size();
643   size_t newSize = alignTo(ty.length(), 4);
644   merged.recs.resize(offset + newSize);
645   auto newRec = MutableArrayRef(&merged.recs[offset], newSize);
646   memcpy(newRec.data(), ty.data().data(), newSize);
647 
648   // Fix up the record prefix and padding bytes if it required resizing.
649   if (newSize != ty.length()) {
650     reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
651     for (size_t i = ty.length(); i < newSize; ++i)
652       newRec[i] = LF_PAD0 + (newSize - i);
653   }
654 
655   // Remap the type indices in the new record.
656   remapTypesInTypeRecord(newRec);
657   uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
658   merged.recSizes.push_back(static_cast<uint16_t>(newSize));
659   merged.recHashes.push_back(pdbHash);
660 
661   // Retain a mapping from PDB function id to PDB function type. This mapping is
662   // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
663   // symbols.
664   if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
665     bool success = ty.length() >= 12;
666     TypeIndex funcId = curIndex;
667     if (success)
668       success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
669     TypeIndex funcType =
670         *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]);
671     if (success) {
672       funcIdToType.push_back({funcId, funcType});
673     } else {
674       StringRef fname = file ? file->getName() : "<unknown PDB>";
675       warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) +
676            " in " + fname);
677     }
678   }
679 }
680 
mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,TypeIndex beginIndex)681 void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
682                                        TypeIndex beginIndex) {
683   // Re-sort the list of unique types by index.
684   if (kind == PDB)
685     assert(llvm::is_sorted(uniqueTypes));
686   else
687     llvm::sort(uniqueTypes);
688 
689   // Accumulate all the unique types into one buffer in mergedTypes.
690   uint32_t ghashIndex = 0;
691   auto nextUniqueIndex = uniqueTypes.begin();
692   assert(mergedTpi.recs.empty());
693   assert(mergedIpi.recs.empty());
694 
695   // Pre-compute the number of elements in advance to avoid std::vector resizes.
696   unsigned nbTpiRecs = 0;
697   unsigned nbIpiRecs = 0;
698   forEachTypeChecked(typeRecords, [&](const CVType &ty) {
699     if (nextUniqueIndex != uniqueTypes.end() &&
700         *nextUniqueIndex == ghashIndex) {
701       assert(ty.length() <= codeview::MaxRecordLength);
702       size_t newSize = alignTo(ty.length(), 4);
703       (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
704       ++nextUniqueIndex;
705     }
706     ++ghashIndex;
707   });
708   mergedTpi.recs.reserve(nbTpiRecs);
709   mergedIpi.recs.reserve(nbIpiRecs);
710 
711   // Do the actual type merge.
712   ghashIndex = 0;
713   nextUniqueIndex = uniqueTypes.begin();
714   forEachTypeChecked(typeRecords, [&](const CVType &ty) {
715     if (nextUniqueIndex != uniqueTypes.end() &&
716         *nextUniqueIndex == ghashIndex) {
717       mergeTypeRecord(beginIndex + ghashIndex, ty);
718       ++nextUniqueIndex;
719     }
720     ++ghashIndex;
721   });
722   assert(nextUniqueIndex == uniqueTypes.end() &&
723          "failed to merge all desired records");
724   assert(uniqueTypes.size() ==
725              mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
726          "missing desired record");
727 }
728 
remapTpiWithGHashes(GHashState * g)729 void TpiSource::remapTpiWithGHashes(GHashState *g) {
730   assert(ctx.config.debugGHashes && "ghashes must be enabled");
731   fillMapFromGHashes(g);
732   tpiMap = indexMapStorage;
733   ipiMap = indexMapStorage;
734   mergeUniqueTypeRecords(file->debugTypes);
735   // TODO: Free all unneeded ghash resources now that we have a full index map.
736 
737   if (ctx.config.showSummary) {
738     nbTypeRecords = ghashes.size();
739     nbTypeRecordsBytes = file->debugTypes.size();
740   }
741 }
742 
743 // PDBs do not actually store global hashes, so when merging a type server
744 // PDB we have to synthesize global hashes.  To do this, we first synthesize
745 // global hashes for the TPI stream, since it is independent, then we
746 // synthesize hashes for the IPI stream, using the hashes for the TPI stream
747 // as inputs.
loadGHashes()748 void TypeServerSource::loadGHashes() {
749   // Don't hash twice.
750   if (!ghashes.empty())
751     return;
752   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
753 
754   // Hash TPI stream.
755   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
756   if (auto e = expectedTpi.takeError())
757     fatal("Type server does not have TPI stream: " + toString(std::move(e)));
758   assignGHashesFromVector(
759       GloballyHashedType::hashTypes(expectedTpi->typeArray()));
760   isItemIndex.resize(ghashes.size());
761 
762   // Hash IPI stream, which depends on TPI ghashes.
763   if (!pdbFile.hasPDBIpiStream())
764     return;
765   Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
766   if (auto e = expectedIpi.takeError())
767     fatal("error retrieving IPI stream: " + toString(std::move(e)));
768   ipiSrc->assignGHashesFromVector(
769       GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
770 
771   // The IPI stream isItemIndex bitvector should be all ones.
772   ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size());
773   ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
774 }
775 
776 // Flatten discontiguous PDB type arrays to bytes so that we can use
777 // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
778 // type servers is faster than iterating all object files compiled with /Z7 with
779 // CVTypeArray, which has high overheads due to the virtual interface of
780 // BinaryStream::readBytes.
typeArrayToBytes(const CVTypeArray & types)781 static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
782   BinaryStreamRef stream = types.getUnderlyingStream();
783   ArrayRef<uint8_t> debugTypes;
784   checkError(stream.readBytes(0, stream.getLength(), debugTypes));
785   return debugTypes;
786 }
787 
788 // Merge types from a type server PDB.
remapTpiWithGHashes(GHashState * g)789 void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
790   assert(ctx.config.debugGHashes && "ghashes must be enabled");
791 
792   // IPI merging depends on TPI, so do TPI first, then do IPI.  No need to
793   // propagate errors, those should've been handled during ghash loading.
794   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
795   pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
796   fillMapFromGHashes(g);
797   tpiMap = indexMapStorage;
798   mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray()));
799   if (pdbFile.hasPDBIpiStream()) {
800     pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
801     ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size());
802     ipiSrc->fillMapFromGHashes(g);
803     ipiMap = ipiSrc->indexMapStorage;
804     ipiSrc->tpiMap = tpiMap;
805     ipiSrc->ipiMap = ipiMap;
806     ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray()));
807 
808     if (ctx.config.showSummary) {
809       nbTypeRecords = ipiSrc->ghashes.size();
810       nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
811     }
812   }
813 
814   if (ctx.config.showSummary) {
815     nbTypeRecords += ghashes.size();
816     nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
817   }
818 }
819 
remapTpiWithGHashes(GHashState * g)820 void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
821   // No remapping to do with /Zi objects. Simply use the index map from the type
822   // server. Errors should have been reported earlier. Symbols from this object
823   // will be ignored.
824   Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
825   if (!maybeTsSrc) {
826     typeMergingError =
827         joinErrors(std::move(typeMergingError), maybeTsSrc.takeError());
828     return;
829   }
830   TypeServerSource *tsSrc = *maybeTsSrc;
831   tpiMap = tsSrc->tpiMap;
832   ipiMap = tsSrc->ipiMap;
833 }
834 
loadGHashes()835 void PrecompSource::loadGHashes() {
836   if (getDebugH(file)) {
837     warn("ignoring .debug$H section; pch with ghash is not implemented");
838   }
839 
840   uint32_t ghashIdx = 0;
841   std::vector<GloballyHashedType> hashVec;
842   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
843     // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
844     // the PDB. There must be an entry in the list of ghashes so that the type
845     // indexes of the following records in the /Yc PCH object line up.
846     if (ty.kind() == LF_ENDPRECOMP) {
847       EndPrecompRecord endPrecomp;
848       cantFail(TypeDeserializer::deserializeAs<EndPrecompRecord>(
849           const_cast<CVType &>(ty), endPrecomp));
850       file->pchSignature = endPrecomp.getSignature();
851       registerMapping();
852       endPrecompIdx = ghashIdx;
853     }
854 
855     hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
856     isItemIndex.push_back(isIdRecord(ty.kind()));
857     ++ghashIdx;
858   });
859   assignGHashesFromVector(std::move(hashVec));
860 }
861 
loadGHashes()862 void UsePrecompSource::loadGHashes() {
863   auto e = findPrecompMap(file, precompDependency);
864   if (!e) {
865     warn(toString(e.takeError()));
866     return;
867   }
868 
869   PrecompSource *pchSrc = *e;
870 
871   // To compute ghashes of a /Yu object file, we need to build on the ghashes of
872   // the /Yc PCH object. After we are done hashing, discard the ghashes from the
873   // PCH source so we don't unnecessarily try to deduplicate them.
874   std::vector<GloballyHashedType> hashVec =
875       pchSrc->ghashes.take_front(precompDependency.getTypesCount());
876   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
877     hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
878     isItemIndex.push_back(isIdRecord(ty.kind()));
879   });
880   hashVec.erase(hashVec.begin(),
881                 hashVec.begin() + precompDependency.getTypesCount());
882   assignGHashesFromVector(std::move(hashVec));
883 }
884 
remapTpiWithGHashes(GHashState * g)885 void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
886   fillMapFromGHashes(g);
887   // This object was compiled with /Yu, so process the corresponding
888   // precompiled headers object (/Yc) first. Some type indices in the current
889   // object are referencing data in the precompiled headers object, so we need
890   // both to be loaded.
891   if (Error e = mergeInPrecompHeaderObj()) {
892     typeMergingError = joinErrors(std::move(typeMergingError), std::move(e));
893     return;
894   }
895 
896   tpiMap = indexMapStorage;
897   ipiMap = indexMapStorage;
898   mergeUniqueTypeRecords(file->debugTypes,
899                          TypeIndex(precompDependency.getStartTypeIndex() +
900                                    precompDependency.getTypesCount()));
901   if (ctx.config.showSummary) {
902     nbTypeRecords = ghashes.size();
903     nbTypeRecordsBytes = file->debugTypes.size();
904   }
905 }
906 
907 namespace {
908 /// A concurrent hash table for global type hashing. It is based on this paper:
909 /// Concurrent Hash Tables: Fast and General(?)!
910 /// https://dl.acm.org/doi/10.1145/3309206
911 ///
912 /// This hash table is meant to be used in two phases:
913 /// 1. concurrent insertions
914 /// 2. concurrent reads
915 /// It does not support lookup, deletion, or rehashing. It uses linear probing.
916 ///
917 /// The paper describes storing a key-value pair in two machine words.
918 /// Generally, the values stored in this map are type indices, and we can use
919 /// those values to recover the ghash key from a side table. This allows us to
920 /// shrink the table entries further at the cost of some loads, and sidesteps
921 /// the need for a 128 bit atomic compare-and-swap operation.
922 ///
923 /// During insertion, a priority function is used to decide which insertion
924 /// should be preferred. This ensures that the output is deterministic. For
925 /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
926 ///
927 class GHashCell;
928 struct GHashTable {
929   GHashCell *table = nullptr;
930   uint32_t tableSize = 0;
931 
932   GHashTable() = default;
933   ~GHashTable();
934 
935   /// Initialize the table with the given size. Because the table cannot be
936   /// resized, the initial size of the table must be large enough to contain all
937   /// inputs, or insertion may not be able to find an empty cell.
938   void init(uint32_t newTableSize);
939 
940   /// Insert the cell with the given ghash into the table. Return the insertion
941   /// position in the table. It is safe for the caller to store the insertion
942   /// position because the table cannot be resized.
943   uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
944                   GHashCell newCell);
945 };
946 
947 /// A ghash table cell for deduplicating types from TpiSources.
948 class GHashCell {
949   // Force "data" to be 64-bit aligned; otherwise, some versions of clang
950   // will generate calls to libatomic when using some versions of libstdc++
951   // on 32-bit targets.  (Also, in theory, there could be a target where
952   // new[] doesn't always return an 8-byte-aligned allocation.)
953   alignas(sizeof(uint64_t)) uint64_t data = 0;
954 
955 public:
956   GHashCell() = default;
957 
958   // Construct data most to least significant so that sorting works well:
959   // - isItem
960   // - tpiSrcIdx
961   // - ghashIdx
962   // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
963   // non-zero representation.
GHashCell(bool isItem,uint32_t tpiSrcIdx,uint32_t ghashIdx)964   GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
965       : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
966              ghashIdx) {
967     assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
968     assert(ghashIdx == getGHashIdx() && "round trip failure");
969   }
970 
GHashCell(uint64_t data)971   explicit GHashCell(uint64_t data) : data(data) {}
972 
973   // The empty cell is all zeros.
isEmpty() const974   bool isEmpty() const { return data == 0ULL; }
975 
976   /// Extract the tpiSrcIdx.
getTpiSrcIdx() const977   uint32_t getTpiSrcIdx() const {
978     return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
979   }
980 
981   /// Extract the index into the ghash array of the TpiSource.
getGHashIdx() const982   uint32_t getGHashIdx() const { return (uint32_t)data; }
983 
isItem() const984   bool isItem() const { return data & (1ULL << 63U); }
985 
986   /// Get the ghash key for this cell.
getGHash(const COFFLinkerContext & ctx) const987   GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
988     return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()];
989   }
990 
991   /// The priority function for the cell. The data is stored such that lower
992   /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
993   /// from earlier sources are more likely to prevail.
operator <(const GHashCell & l,const GHashCell & r)994   friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
995     return l.data < r.data;
996   }
997 };
998 } // namespace
999 
1000 namespace lld::coff {
1001 /// This type is just a wrapper around GHashTable with external linkage so it
1002 /// can be used from a header.
1003 struct GHashState {
1004   GHashTable table;
1005 };
1006 } // namespace lld::coff
1007 
~GHashTable()1008 GHashTable::~GHashTable() { delete[] table; }
1009 
init(uint32_t newTableSize)1010 void GHashTable::init(uint32_t newTableSize) {
1011   table = new GHashCell[newTableSize];
1012   memset(table, 0, newTableSize * sizeof(GHashCell));
1013   tableSize = newTableSize;
1014 }
1015 
insert(COFFLinkerContext & ctx,GloballyHashedType ghash,GHashCell newCell)1016 uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
1017                             GHashCell newCell) {
1018   assert(!newCell.isEmpty() && "cannot insert empty cell value");
1019 
1020   // FIXME: The low bytes of SHA1 have low entropy for short records, which
1021   // type records are. Swap the byte order for better entropy. A better ghash
1022   // won't need this.
1023   uint32_t startIdx =
1024       ByteSwap_64(*reinterpret_cast<uint64_t *>(&ghash)) % tableSize;
1025 
1026   // Do a linear probe starting at startIdx.
1027   uint32_t idx = startIdx;
1028   while (true) {
1029     // Run a compare and swap loop. There are four cases:
1030     // - cell is empty: CAS into place and return
1031     // - cell has matching key, earlier priority: do nothing, return
1032     // - cell has matching key, later priority: CAS into place and return
1033     // - cell has non-matching key: hash collision, probe next cell
1034     auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
1035     GHashCell oldCell(cellPtr->load());
1036     while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) {
1037       // Check if there is an existing ghash entry with a higher priority
1038       // (earlier ordering). If so, this is a duplicate, we are done.
1039       if (!oldCell.isEmpty() && oldCell < newCell)
1040         return idx;
1041       // Either the cell is empty, or our value is higher priority. Try to
1042       // compare and swap. If it succeeds, we are done.
1043       if (cellPtr->compare_exchange_weak(oldCell, newCell))
1044         return idx;
1045       // If the CAS failed, check this cell again.
1046     }
1047 
1048     // Advance the probe. Wrap around to the beginning if we run off the end.
1049     ++idx;
1050     idx = idx == tableSize ? 0 : idx;
1051     if (idx == startIdx) {
1052       // If this becomes an issue, we could mark failure and rehash from the
1053       // beginning with a bigger table. There is no difference between rehashing
1054       // internally and starting over.
1055       report_fatal_error("ghash table is full");
1056     }
1057   }
1058   llvm_unreachable("left infloop");
1059 }
1060 
TypeMerger(COFFLinkerContext & c,llvm::BumpPtrAllocator & alloc)1061 TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
1062     : typeTable(alloc), idTable(alloc), ctx(c) {}
1063 
1064 TypeMerger::~TypeMerger() = default;
1065 
mergeTypesWithGHash()1066 void TypeMerger::mergeTypesWithGHash() {
1067   // Load ghashes. Do type servers and PCH objects first.
1068   {
1069     ScopedTimer t1(ctx.loadGHashTimer);
1070     parallelForEach(dependencySources,
1071                     [&](TpiSource *source) { source->loadGHashes(); });
1072     parallelForEach(objectSources,
1073                     [&](TpiSource *source) { source->loadGHashes(); });
1074   }
1075 
1076   ScopedTimer t2(ctx.mergeGHashTimer);
1077   GHashState ghashState;
1078 
1079   // Estimate the size of hash table needed to deduplicate ghashes. This *must*
1080   // be larger than the number of unique types, or hash table insertion may not
1081   // be able to find a vacant slot. Summing the input types guarantees this, but
1082   // it is a gross overestimate. The table size could be reduced to save memory,
1083   // but it would require implementing rehashing, and this table is generally
1084   // small compared to total memory usage, at eight bytes per input type record,
1085   // and most input type records are larger than eight bytes.
1086   size_t tableSize = 0;
1087   for (TpiSource *source : ctx.tpiSourceList)
1088     tableSize += source->ghashes.size();
1089 
1090   // Cap the table size so that we can use 32-bit cell indices. Type indices are
1091   // also 32-bit, so this is an inherent PDB file format limit anyway.
1092   tableSize =
1093       std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize);
1094   ghashState.table.init(static_cast<uint32_t>(tableSize));
1095 
1096   // Insert ghashes in parallel. During concurrent insertion, we cannot observe
1097   // the contents of the hash table cell, but we can remember the insertion
1098   // position. Because the table does not rehash, the position will not change
1099   // under insertion. After insertion is done, the value of the cell can be read
1100   // to retrieve the final PDB type index.
1101   parallelFor(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) {
1102     TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1103     source->indexMapStorage.resize(source->ghashes.size());
1104     for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
1105       if (source->shouldOmitFromPdb(i)) {
1106         source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
1107         continue;
1108       }
1109       GloballyHashedType ghash = source->ghashes[i];
1110       bool isItem = source->isItemIndex.test(i);
1111       uint32_t cellIdx =
1112           ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i));
1113 
1114       // Store the ghash cell index as a type index in indexMapStorage. Later
1115       // we will replace it with the PDB type index.
1116       source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
1117     }
1118   });
1119 
1120   // Collect all non-empty cells and sort them. This will implicitly assign
1121   // destination type indices, and partition the entries into type records and
1122   // item records. It arranges types in this order:
1123   // - type records
1124   //   - source 0, type 0...
1125   //   - source 1, type 1...
1126   // - item records
1127   //   - source 0, type 1...
1128   //   - source 1, type 0...
1129   std::vector<GHashCell> entries;
1130   for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) {
1131     if (!cell.isEmpty())
1132       entries.push_back(cell);
1133   }
1134   parallelSort(entries, std::less<GHashCell>());
1135   log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1136               tableSize ? double(entries.size()) / tableSize : 0,
1137               entries.size(), tableSize));
1138 
1139   // Find out how many type and item indices there are.
1140   auto mid = llvm::lower_bound(entries, GHashCell(true, 0, 0));
1141   assert((mid == entries.end() || mid->isItem()) &&
1142          (mid == entries.begin() || !std::prev(mid)->isItem()) &&
1143          "midpoint is not midpoint");
1144   uint32_t numTypes = std::distance(entries.begin(), mid);
1145   uint32_t numItems = std::distance(mid, entries.end());
1146   log("Tpi record count: " + Twine(numTypes));
1147   log("Ipi record count: " + Twine(numItems));
1148 
1149   // Make a list of the "unique" type records to merge for each tpi source. Type
1150   // merging will skip indices not on this list. Store the destination PDB type
1151   // index for these unique types in the tpiMap for each source. The entries for
1152   // non-unique types will be filled in prior to type merging.
1153   for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
1154     auto &cell = entries[i];
1155     uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1156     TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1157     source->uniqueTypes.push_back(cell.getGHashIdx());
1158 
1159     // Update the ghash table to store the destination PDB type index in the
1160     // table.
1161     uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1162     uint32_t ghashCellIndex =
1163         source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
1164     ghashState.table.table[ghashCellIndex] =
1165         GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1166   }
1167 
1168   // In parallel, remap all types.
1169   for (TpiSource *source : dependencySources)
1170     source->remapTpiWithGHashes(&ghashState);
1171   parallelForEach(objectSources, [&](TpiSource *source) {
1172     source->remapTpiWithGHashes(&ghashState);
1173   });
1174 
1175   // Build a global map of from function ID to function type.
1176   for (TpiSource *source : ctx.tpiSourceList) {
1177     for (auto idToType : source->funcIdToType)
1178       funcIdToType.insert(idToType);
1179     source->funcIdToType.clear();
1180   }
1181 
1182   clearGHashes();
1183 }
1184 
sortDependencies()1185 void TypeMerger::sortDependencies() {
1186   // Order dependencies first, but preserve the existing order.
1187   std::vector<TpiSource *> deps;
1188   std::vector<TpiSource *> objs;
1189   for (TpiSource *s : ctx.tpiSourceList)
1190     (s->isDependency() ? deps : objs).push_back(s);
1191   uint32_t numDeps = deps.size();
1192   uint32_t numObjs = objs.size();
1193   ctx.tpiSourceList = std::move(deps);
1194   ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end());
1195   for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i)
1196     ctx.tpiSourceList[i]->tpiSrcIdx = i;
1197   dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps);
1198   objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs);
1199 }
1200 
1201 /// Given the index into the ghash table for a particular type, return the type
1202 /// index for that type in the output PDB.
loadPdbTypeIndexFromCell(GHashState * g,uint32_t ghashCellIdx)1203 static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1204                                           uint32_t ghashCellIdx) {
1205   GHashCell cell = g->table.table[ghashCellIdx];
1206   return TypeIndex::fromArrayIndex(cell.getGHashIdx());
1207 }
1208 
1209 /// Free heap allocated ghashes.
clearGHashes()1210 void TypeMerger::clearGHashes() {
1211   for (TpiSource *src : ctx.tpiSourceList) {
1212     if (src->ownedGHashes)
1213       delete[] src->ghashes.data();
1214     src->ghashes = {};
1215     src->isItemIndex.clear();
1216     src->uniqueTypes.clear();
1217   }
1218 }
1219 
1220 // Fill in a TPI or IPI index map using ghashes. For each source type, use its
1221 // ghash to lookup its final type index in the PDB, and store that in the map.
fillMapFromGHashes(GHashState * g)1222 void TpiSource::fillMapFromGHashes(GHashState *g) {
1223   for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
1224     TypeIndex fakeCellIndex = indexMapStorage[i];
1225     if (fakeCellIndex.isSimple())
1226       indexMapStorage[i] = fakeCellIndex;
1227     else
1228       indexMapStorage[i] =
1229           loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());
1230   }
1231 }
1232