1 //===- DebugTypes.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "DebugTypes.h"
10 #include "COFFLinkerContext.h"
11 #include "Chunks.h"
12 #include "Driver.h"
13 #include "InputFiles.h"
14 #include "PDB.h"
15 #include "TypeMerger.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
19 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
20 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
21 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
22 #include "llvm/DebugInfo/PDB/GenericError.h"
23 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
24 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
25 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
26 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
27 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
28 #include "llvm/Support/FormatVariadic.h"
29 #include "llvm/Support/Parallel.h"
30 #include "llvm/Support/Path.h"
31 
32 using namespace llvm;
33 using namespace llvm::codeview;
34 using namespace lld;
35 using namespace lld::coff;
36 
37 namespace {
38 class TypeServerIpiSource;
39 
40 // The TypeServerSource class represents a PDB type server, a file referenced by
41 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
42 // files, therefore there must be only once instance per OBJ lot. The file path
43 // is discovered from the dependent OBJ's debug type stream. The
44 // TypeServerSource object is then queued and loaded by the COFF Driver. The
45 // debug type stream for such PDB files will be merged first in the final PDB,
46 // before any dependent OBJ.
47 class TypeServerSource : public TpiSource {
48 public:
49   explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
50       : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) {
51     if (f->loadErr && *f->loadErr)
52       return;
53     pdb::PDBFile &file = f->session->getPDBFile();
54     auto expectedInfo = file.getPDBInfoStream();
55     if (!expectedInfo)
56       return;
57     Guid = expectedInfo->getGuid();
58     auto it = ctx.typeServerSourceMappings.emplace(Guid, this);
59     if (!it.second) {
60       // If we hit here we have collision on Guid's in two PDB files.
61       // This can happen if the PDB Guid is invalid or if we are really
62       // unlucky. This should fall back on stright file-system lookup.
63       it.first->second = nullptr;
64     }
65   }
66 
67   Error mergeDebugT(TypeMerger *m) override;
68 
69   void loadGHashes() override;
70   void remapTpiWithGHashes(GHashState *g) override;
71 
72   bool isDependency() const override { return true; }
73 
74   PDBInputFile *pdbInputFile = nullptr;
75 
76   // TpiSource for IPI stream.
77   TypeServerIpiSource *ipiSrc = nullptr;
78 
79   // The PDB signature GUID.
80   codeview::GUID Guid;
81 };
82 
83 // Companion to TypeServerSource. Stores the index map for the IPI stream in the
84 // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
85 // invariant of one type index space per source.
86 class TypeServerIpiSource : public TpiSource {
87 public:
88   explicit TypeServerIpiSource(COFFLinkerContext &ctx)
89       : TpiSource(ctx, PDBIpi, nullptr) {}
90 
91   friend class TypeServerSource;
92 
93   // All of the TpiSource methods are no-ops. The parent TypeServerSource
94   // handles both TPI and IPI.
95   Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
96   void loadGHashes() override {}
97   void remapTpiWithGHashes(GHashState *g) override {}
98   bool isDependency() const override { return true; }
99 };
100 
101 // This class represents the debug type stream of an OBJ file that depends on a
102 // PDB type server (see TypeServerSource).
103 class UseTypeServerSource : public TpiSource {
104   Expected<TypeServerSource *> getTypeServerSource();
105 
106 public:
107   UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
108       : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {}
109 
110   Error mergeDebugT(TypeMerger *m) override;
111 
112   // No need to load ghashes from /Zi objects.
113   void loadGHashes() override {}
114   void remapTpiWithGHashes(GHashState *g) override;
115 
116   // Information about the PDB type server dependency, that needs to be loaded
117   // in before merging this OBJ.
118   TypeServer2Record typeServerDependency;
119 };
120 
121 // This class represents the debug type stream of a Microsoft precompiled
122 // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
123 // PDB, before any other OBJs that depend on this. Note that only MSVC generate
124 // such files, clang does not.
125 class PrecompSource : public TpiSource {
126 public:
127   PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) {
128     if (!f->pchSignature || !*f->pchSignature)
129       fatal(toString(f) +
130             " claims to be a PCH object, but does not have a valid signature");
131     auto it = ctx.precompSourceMappings.emplace(*f->pchSignature, this);
132     if (!it.second)
133       fatal("a PCH object with the same signature has already been provided (" +
134             toString(it.first->second->file) + " and " + toString(file) + ")");
135   }
136 
137   void loadGHashes() override;
138 
139   bool isDependency() const override { return true; }
140 };
141 
142 // This class represents the debug type stream of an OBJ file that depends on a
143 // Microsoft precompiled headers OBJ (see PrecompSource).
144 class UsePrecompSource : public TpiSource {
145 public:
146   UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
147       : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {}
148 
149   Error mergeDebugT(TypeMerger *m) override;
150 
151   void loadGHashes() override;
152   void remapTpiWithGHashes(GHashState *g) override;
153 
154 private:
155   Error mergeInPrecompHeaderObj();
156 
157   PrecompSource *findObjByName(StringRef fileNameOnly);
158   PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr);
159   Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr);
160 
161 public:
162   // Information about the Precomp OBJ dependency, that needs to be loaded in
163   // before merging this OBJ.
164   PrecompRecord precompDependency;
165 };
166 } // namespace
167 
168 TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
169     : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
170   ctx.addTpiSource(this);
171 }
172 
173 // Vtable key method.
174 TpiSource::~TpiSource() {
175   // Silence any assertions about unchecked errors.
176   consumeError(std::move(typeMergingError));
177 }
178 
179 TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) {
180   return make<TpiSource>(ctx, TpiSource::Regular, file);
181 }
182 
183 TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
184                                            PDBInputFile *pdbInputFile) {
185   // Type server sources come in pairs: the TPI stream, and the IPI stream.
186   auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile);
187   if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
188     tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx);
189   return tpiSource;
190 }
191 
192 TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
193                                               ObjFile *file,
194                                               TypeServer2Record ts) {
195   return make<UseTypeServerSource>(ctx, file, ts);
196 }
197 
198 TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) {
199   return make<PrecompSource>(ctx, file);
200 }
201 
202 TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
203                                            ObjFile *file,
204                                            PrecompRecord precomp) {
205   return make<UsePrecompSource>(ctx, file, precomp);
206 }
207 
208 bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
209   if (ti.isSimple())
210     return true;
211 
212   // This can be an item index or a type index. Choose the appropriate map.
213   ArrayRef<TypeIndex> tpiOrIpiMap =
214       (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
215   if (ti.toArrayIndex() >= tpiOrIpiMap.size())
216     return false;
217   ti = tpiOrIpiMap[ti.toArrayIndex()];
218   return true;
219 }
220 
221 void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
222                             ArrayRef<TiReference> typeRefs) {
223   MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
224   for (const TiReference &ref : typeRefs) {
225     unsigned byteSize = ref.Count * sizeof(TypeIndex);
226     if (contents.size() < ref.Offset + byteSize)
227       fatal("symbol record too short");
228 
229     MutableArrayRef<TypeIndex> indices(
230         reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
231     for (TypeIndex &ti : indices) {
232       if (!remapTypeIndex(ti, ref.Kind)) {
233         if (config->verbose) {
234           uint16_t kind =
235               reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
236           StringRef fname = file ? file->getName() : "<unknown PDB>";
237           log("failed to remap type index in record of kind 0x" +
238               utohexstr(kind) + " in " + fname + " with bad " +
239               (ref.Kind == TiRefKind::IndexRef ? "item" : "type") +
240               " index 0x" + utohexstr(ti.getIndex()));
241         }
242         ti = TypeIndex(SimpleTypeKind::NotTranslated);
243         continue;
244       }
245     }
246   }
247 }
248 
249 void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
250   // TODO: Handle errors similar to symbols.
251   SmallVector<TiReference, 32> typeRefs;
252   discoverTypeIndices(CVType(rec), typeRefs);
253   remapRecord(rec, typeRefs);
254 }
255 
256 bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
257   // Discover type index references in the record. Skip it if we don't
258   // know where they are.
259   SmallVector<TiReference, 32> typeRefs;
260   if (!discoverTypeIndicesInSymbol(rec, typeRefs))
261     return false;
262   remapRecord(rec, typeRefs);
263   return true;
264 }
265 
266 // A COFF .debug$H section is currently a clang extension.  This function checks
267 // if a .debug$H section is in a format that we expect / understand, so that we
268 // can ignore any sections which are coincidentally also named .debug$H but do
269 // not contain a format we recognize.
270 static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
271   if (debugH.size() < sizeof(object::debug_h_header))
272     return false;
273   auto *header =
274       reinterpret_cast<const object::debug_h_header *>(debugH.data());
275   debugH = debugH.drop_front(sizeof(object::debug_h_header));
276   return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
277          header->Version == 0 &&
278          header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::SHA1_8) &&
279          (debugH.size() % 8 == 0);
280 }
281 
282 static Optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
283   SectionChunk *sec =
284       SectionChunk::findByName(file->getDebugChunks(), ".debug$H");
285   if (!sec)
286     return llvm::None;
287   ArrayRef<uint8_t> contents = sec->getContents();
288   if (!canUseDebugH(contents))
289     return None;
290   return contents;
291 }
292 
293 static ArrayRef<GloballyHashedType>
294 getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
295   assert(canUseDebugH(debugH));
296   debugH = debugH.drop_front(sizeof(object::debug_h_header));
297   uint32_t count = debugH.size() / sizeof(GloballyHashedType);
298   return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
299 }
300 
301 // Merge .debug$T for a generic object file.
302 Error TpiSource::mergeDebugT(TypeMerger *m) {
303   assert(!config->debugGHashes &&
304          "use remapTpiWithGHashes when ghash is enabled");
305 
306   CVTypeArray types;
307   BinaryStreamReader reader(file->debugTypes, support::little);
308   cantFail(reader.readArray(types, reader.getLength()));
309 
310   // When dealing with PCH.OBJ, some indices were already merged.
311   unsigned nbHeadIndices = indexMapStorage.size();
312 
313   if (auto err = mergeTypeAndIdRecords(
314           m->idTable, m->typeTable, indexMapStorage, types, file->pchSignature))
315     fatal("codeview::mergeTypeAndIdRecords failed: " +
316           toString(std::move(err)));
317 
318   // In an object, there is only one mapping for both types and items.
319   tpiMap = indexMapStorage;
320   ipiMap = indexMapStorage;
321 
322   if (config->showSummary) {
323     nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
324     nbTypeRecordsBytes = reader.getLength();
325     // Count how many times we saw each type record in our input. This
326     // calculation requires a second pass over the type records to classify each
327     // record as a type or index. This is slow, but this code executes when
328     // collecting statistics.
329     m->tpiCounts.resize(m->getTypeTable().size());
330     m->ipiCounts.resize(m->getIDTable().size());
331     uint32_t srcIdx = nbHeadIndices;
332     for (const CVType &ty : types) {
333       TypeIndex dstIdx = tpiMap[srcIdx++];
334       // Type merging may fail, so a complex source type may become the simple
335       // NotTranslated type, which cannot be used as an array index.
336       if (dstIdx.isSimple())
337         continue;
338       SmallVectorImpl<uint32_t> &counts =
339           isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts;
340       ++counts[dstIdx.toArrayIndex()];
341     }
342   }
343 
344   return Error::success();
345 }
346 
347 // Merge types from a type server PDB.
348 Error TypeServerSource::mergeDebugT(TypeMerger *m) {
349   assert(!config->debugGHashes &&
350          "use remapTpiWithGHashes when ghash is enabled");
351 
352   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
353   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
354   if (auto e = expectedTpi.takeError())
355     fatal("Type server does not have TPI stream: " + toString(std::move(e)));
356   pdb::TpiStream *maybeIpi = nullptr;
357   if (pdbFile.hasPDBIpiStream()) {
358     Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
359     if (auto e = expectedIpi.takeError())
360       fatal("Error getting type server IPI stream: " + toString(std::move(e)));
361     maybeIpi = &*expectedIpi;
362   }
363 
364   // Merge TPI first, because the IPI stream will reference type indices.
365   if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
366                                   expectedTpi->typeArray()))
367     fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
368   tpiMap = indexMapStorage;
369 
370   // Merge IPI.
371   if (maybeIpi) {
372     if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
373                                   maybeIpi->typeArray()))
374       fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
375     ipiMap = ipiSrc->indexMapStorage;
376   }
377 
378   if (config->showSummary) {
379     nbTypeRecords = tpiMap.size() + ipiMap.size();
380     nbTypeRecordsBytes =
381         expectedTpi->typeArray().getUnderlyingStream().getLength() +
382         (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
383                   : 0);
384 
385     // Count how many times we saw each type record in our input. If a
386     // destination type index is present in the source to destination type index
387     // map, that means we saw it once in the input. Add it to our histogram.
388     m->tpiCounts.resize(m->getTypeTable().size());
389     m->ipiCounts.resize(m->getIDTable().size());
390     for (TypeIndex ti : tpiMap)
391       if (!ti.isSimple())
392         ++m->tpiCounts[ti.toArrayIndex()];
393     for (TypeIndex ti : ipiMap)
394       if (!ti.isSimple())
395         ++m->ipiCounts[ti.toArrayIndex()];
396   }
397 
398   return Error::success();
399 }
400 
401 Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
402   const codeview::GUID &tsId = typeServerDependency.getGuid();
403   StringRef tsPath = typeServerDependency.getName();
404 
405   TypeServerSource *tsSrc = nullptr;
406   auto it = ctx.typeServerSourceMappings.find(tsId);
407   if (it != ctx.typeServerSourceMappings.end()) {
408     tsSrc = (TypeServerSource *)it->second;
409   }
410   if (tsSrc == nullptr) {
411     // The file failed to load, lookup by name
412     PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file);
413     if (!pdb)
414       return createFileError(tsPath, errorCodeToError(std::error_code(
415                                          ENOENT, std::generic_category())));
416     // If an error occurred during loading, throw it now
417     if (pdb->loadErr && *pdb->loadErr)
418       return createFileError(tsPath, std::move(*pdb->loadErr));
419 
420     tsSrc = (TypeServerSource *)pdb->debugTypesObj;
421 
422     // Just because a file with a matching name was found and it was an actual
423     // PDB file doesn't mean it matches.  For it to match the InfoStream's GUID
424     // must match the GUID specified in the TypeServer2 record.
425     if (tsSrc->Guid != tsId) {
426       return createFileError(tsPath,
427                              make_error<pdb::PDBError>(
428                                  pdb::pdb_error_code::signature_out_of_date));
429     }
430   }
431   return tsSrc;
432 }
433 
434 Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
435   Expected<TypeServerSource *> tsSrc = getTypeServerSource();
436   if (!tsSrc)
437     return tsSrc.takeError();
438 
439   pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
440   auto expectedInfo = pdbSession.getPDBInfoStream();
441   if (!expectedInfo)
442     return expectedInfo.takeError();
443 
444   // Reuse the type index map of the type server.
445   tpiMap = (*tsSrc)->tpiMap;
446   ipiMap = (*tsSrc)->ipiMap;
447   return Error::success();
448 }
449 
450 static bool equalsPath(StringRef path1, StringRef path2) {
451 #if defined(_WIN32)
452   return path1.equals_insensitive(path2);
453 #else
454   return path1.equals(path2);
455 #endif
456 }
457 
458 // Find by name an OBJ provided on the command line
459 PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
460   SmallString<128> currentPath;
461   for (auto kv : ctx.precompSourceMappings) {
462     StringRef currentFileName = sys::path::filename(kv.second->file->getName(),
463                                                     sys::path::Style::windows);
464 
465     // Compare based solely on the file name (link.exe behavior)
466     if (equalsPath(currentFileName, fileNameOnly))
467       return (PrecompSource *)kv.second;
468   }
469   return nullptr;
470 }
471 
472 PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file,
473                                                    PrecompRecord &pr) {
474   // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
475   // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
476   // the paths embedded in the OBJs are in the Windows format.
477   SmallString<128> prFileName =
478       sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
479 
480   auto it = ctx.precompSourceMappings.find(pr.getSignature());
481   if (it != ctx.precompSourceMappings.end()) {
482     return (PrecompSource *)it->second;
483   }
484   // Lookup by name
485   return findObjByName(prFileName);
486 }
487 
488 Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file,
489                                                            PrecompRecord &pr) {
490   PrecompSource *precomp = findPrecompSource(file, pr);
491 
492   if (!precomp)
493     return createFileError(
494         pr.getPrecompFilePath(),
495         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
496 
497   if (pr.getSignature() != file->pchSignature)
498     return createFileError(
499         toString(file),
500         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
501 
502   if (pr.getSignature() != *precomp->file->pchSignature)
503     return createFileError(
504         toString(precomp->file),
505         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
506 
507   return precomp;
508 }
509 
510 /// Merges a precompiled headers TPI map into the current TPI map. The
511 /// precompiled headers object will also be loaded and remapped in the
512 /// process.
513 Error UsePrecompSource::mergeInPrecompHeaderObj() {
514   auto e = findPrecompMap(file, precompDependency);
515   if (!e)
516     return e.takeError();
517 
518   PrecompSource *precompSrc = *e;
519   if (precompSrc->tpiMap.empty())
520     return Error::success();
521 
522   assert(precompDependency.getStartTypeIndex() ==
523          TypeIndex::FirstNonSimpleIndex);
524   assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
525   // Use the previously remapped index map from the precompiled headers.
526   indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(),
527                          precompSrc->tpiMap.begin() +
528                              precompDependency.getTypesCount());
529 
530   return Error::success();
531 }
532 
533 Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
534   // This object was compiled with /Yu, so process the corresponding
535   // precompiled headers object (/Yc) first. Some type indices in the current
536   // object are referencing data in the precompiled headers object, so we need
537   // both to be loaded.
538   if (Error e = mergeInPrecompHeaderObj())
539     return e;
540 
541   return TpiSource::mergeDebugT(m);
542 }
543 
544 //===----------------------------------------------------------------------===//
545 // Parellel GHash type merging implementation.
546 //===----------------------------------------------------------------------===//
547 
548 void TpiSource::loadGHashes() {
549   if (Optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
550     ghashes = getHashesFromDebugH(*debugH);
551     ownedGHashes = false;
552   } else {
553     CVTypeArray types;
554     BinaryStreamReader reader(file->debugTypes, support::little);
555     cantFail(reader.readArray(types, reader.getLength()));
556     assignGHashesFromVector(GloballyHashedType::hashTypes(types));
557   }
558 
559   fillIsItemIndexFromDebugT();
560 }
561 
562 // Copies ghashes from a vector into an array. These are long lived, so it's
563 // worth the time to copy these into an appropriately sized vector to reduce
564 // memory usage.
565 void TpiSource::assignGHashesFromVector(
566     std::vector<GloballyHashedType> &&hashVec) {
567   if (hashVec.empty())
568     return;
569   GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
570   memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType));
571   ghashes = makeArrayRef(hashes, hashVec.size());
572   ownedGHashes = true;
573 }
574 
575 // Faster way to iterate type records. forEachTypeChecked is faster than
576 // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
577 static void forEachTypeChecked(ArrayRef<uint8_t> types,
578                                function_ref<void(const CVType &)> fn) {
579   checkError(
580       forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
581         fn(ty);
582         return Error::success();
583       }));
584 }
585 
586 // Walk over file->debugTypes and fill in the isItemIndex bit vector.
587 // TODO: Store this information in .debug$H so that we don't have to recompute
588 // it. This is the main bottleneck slowing down parallel ghashing with one
589 // thread over single-threaded ghashing.
590 void TpiSource::fillIsItemIndexFromDebugT() {
591   uint32_t index = 0;
592   isItemIndex.resize(ghashes.size());
593   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
594     if (isIdRecord(ty.kind()))
595       isItemIndex.set(index);
596     ++index;
597   });
598 }
599 
600 void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
601   // Decide if the merged type goes into TPI or IPI.
602   bool isItem = isIdRecord(ty.kind());
603   MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
604 
605   // Copy the type into our mutable buffer.
606   assert(ty.length() <= codeview::MaxRecordLength);
607   size_t offset = merged.recs.size();
608   size_t newSize = alignTo(ty.length(), 4);
609   merged.recs.resize(offset + newSize);
610   auto newRec = makeMutableArrayRef(&merged.recs[offset], newSize);
611   memcpy(newRec.data(), ty.data().data(), newSize);
612 
613   // Fix up the record prefix and padding bytes if it required resizing.
614   if (newSize != ty.length()) {
615     reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
616     for (size_t i = ty.length(); i < newSize; ++i)
617       newRec[i] = LF_PAD0 + (newSize - i);
618   }
619 
620   // Remap the type indices in the new record.
621   remapTypesInTypeRecord(newRec);
622   uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
623   merged.recSizes.push_back(static_cast<uint16_t>(newSize));
624   merged.recHashes.push_back(pdbHash);
625 
626   // Retain a mapping from PDB function id to PDB function type. This mapping is
627   // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
628   // symbols.
629   if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
630     bool success = ty.length() >= 12;
631     TypeIndex funcId = curIndex;
632     if (success)
633       success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
634     TypeIndex funcType =
635         *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]);
636     if (success) {
637       funcIdToType.push_back({funcId, funcType});
638     } else {
639       StringRef fname = file ? file->getName() : "<unknown PDB>";
640       warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) +
641            " in " + fname);
642     }
643   }
644 }
645 
646 void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
647                                        TypeIndex beginIndex) {
648   // Re-sort the list of unique types by index.
649   if (kind == PDB)
650     assert(std::is_sorted(uniqueTypes.begin(), uniqueTypes.end()));
651   else
652     llvm::sort(uniqueTypes);
653 
654   // Accumulate all the unique types into one buffer in mergedTypes.
655   uint32_t ghashIndex = 0;
656   auto nextUniqueIndex = uniqueTypes.begin();
657   assert(mergedTpi.recs.empty());
658   assert(mergedIpi.recs.empty());
659 
660   // Pre-compute the number of elements in advance to avoid std::vector resizes.
661   unsigned nbTpiRecs = 0;
662   unsigned nbIpiRecs = 0;
663   forEachTypeChecked(typeRecords, [&](const CVType &ty) {
664     if (nextUniqueIndex != uniqueTypes.end() &&
665         *nextUniqueIndex == ghashIndex) {
666       assert(ty.length() <= codeview::MaxRecordLength);
667       size_t newSize = alignTo(ty.length(), 4);
668       (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
669       ++nextUniqueIndex;
670     }
671     ++ghashIndex;
672   });
673   mergedTpi.recs.reserve(nbTpiRecs);
674   mergedIpi.recs.reserve(nbIpiRecs);
675 
676   // Do the actual type merge.
677   ghashIndex = 0;
678   nextUniqueIndex = uniqueTypes.begin();
679   forEachTypeChecked(typeRecords, [&](const CVType &ty) {
680     if (nextUniqueIndex != uniqueTypes.end() &&
681         *nextUniqueIndex == ghashIndex) {
682       mergeTypeRecord(beginIndex + ghashIndex, ty);
683       ++nextUniqueIndex;
684     }
685     ++ghashIndex;
686   });
687   assert(nextUniqueIndex == uniqueTypes.end() &&
688          "failed to merge all desired records");
689   assert(uniqueTypes.size() ==
690              mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
691          "missing desired record");
692 }
693 
694 void TpiSource::remapTpiWithGHashes(GHashState *g) {
695   assert(config->debugGHashes && "ghashes must be enabled");
696   fillMapFromGHashes(g);
697   tpiMap = indexMapStorage;
698   ipiMap = indexMapStorage;
699   mergeUniqueTypeRecords(file->debugTypes);
700   // TODO: Free all unneeded ghash resources now that we have a full index map.
701 
702   if (config->showSummary) {
703     nbTypeRecords = ghashes.size();
704     nbTypeRecordsBytes = file->debugTypes.size();
705   }
706 }
707 
708 // PDBs do not actually store global hashes, so when merging a type server
709 // PDB we have to synthesize global hashes.  To do this, we first synthesize
710 // global hashes for the TPI stream, since it is independent, then we
711 // synthesize hashes for the IPI stream, using the hashes for the TPI stream
712 // as inputs.
713 void TypeServerSource::loadGHashes() {
714   // Don't hash twice.
715   if (!ghashes.empty())
716     return;
717   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
718 
719   // Hash TPI stream.
720   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
721   if (auto e = expectedTpi.takeError())
722     fatal("Type server does not have TPI stream: " + toString(std::move(e)));
723   assignGHashesFromVector(
724       GloballyHashedType::hashTypes(expectedTpi->typeArray()));
725   isItemIndex.resize(ghashes.size());
726 
727   // Hash IPI stream, which depends on TPI ghashes.
728   if (!pdbFile.hasPDBIpiStream())
729     return;
730   Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
731   if (auto e = expectedIpi.takeError())
732     fatal("error retrieving IPI stream: " + toString(std::move(e)));
733   ipiSrc->assignGHashesFromVector(
734       GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
735 
736   // The IPI stream isItemIndex bitvector should be all ones.
737   ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size());
738   ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
739 }
740 
741 // Flatten discontiguous PDB type arrays to bytes so that we can use
742 // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
743 // type servers is faster than iterating all object files compiled with /Z7 with
744 // CVTypeArray, which has high overheads due to the virtual interface of
745 // BinaryStream::readBytes.
746 static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
747   BinaryStreamRef stream = types.getUnderlyingStream();
748   ArrayRef<uint8_t> debugTypes;
749   checkError(stream.readBytes(0, stream.getLength(), debugTypes));
750   return debugTypes;
751 }
752 
753 // Merge types from a type server PDB.
754 void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
755   assert(config->debugGHashes && "ghashes must be enabled");
756 
757   // IPI merging depends on TPI, so do TPI first, then do IPI.  No need to
758   // propagate errors, those should've been handled during ghash loading.
759   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
760   pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
761   fillMapFromGHashes(g);
762   tpiMap = indexMapStorage;
763   mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray()));
764   if (pdbFile.hasPDBIpiStream()) {
765     pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
766     ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size());
767     ipiSrc->fillMapFromGHashes(g);
768     ipiMap = ipiSrc->indexMapStorage;
769     ipiSrc->tpiMap = tpiMap;
770     ipiSrc->ipiMap = ipiMap;
771     ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray()));
772 
773     if (config->showSummary) {
774       nbTypeRecords = ipiSrc->ghashes.size();
775       nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
776     }
777   }
778 
779   if (config->showSummary) {
780     nbTypeRecords += ghashes.size();
781     nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
782   }
783 }
784 
785 void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
786   // No remapping to do with /Zi objects. Simply use the index map from the type
787   // server. Errors should have been reported earlier. Symbols from this object
788   // will be ignored.
789   Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
790   if (!maybeTsSrc) {
791     typeMergingError =
792         joinErrors(std::move(typeMergingError), maybeTsSrc.takeError());
793     return;
794   }
795   TypeServerSource *tsSrc = *maybeTsSrc;
796   tpiMap = tsSrc->tpiMap;
797   ipiMap = tsSrc->ipiMap;
798 }
799 
800 void PrecompSource::loadGHashes() {
801   if (getDebugH(file)) {
802     warn("ignoring .debug$H section; pch with ghash is not implemented");
803   }
804 
805   uint32_t ghashIdx = 0;
806   std::vector<GloballyHashedType> hashVec;
807   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
808     // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
809     // the PDB. There must be an entry in the list of ghashes so that the type
810     // indexes of the following records in the /Yc PCH object line up.
811     if (ty.kind() == LF_ENDPRECOMP)
812       endPrecompGHashIdx = ghashIdx;
813 
814     hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
815     isItemIndex.push_back(isIdRecord(ty.kind()));
816     ++ghashIdx;
817   });
818   assignGHashesFromVector(std::move(hashVec));
819 }
820 
821 void UsePrecompSource::loadGHashes() {
822   PrecompSource *pchSrc = findPrecompSource(file, precompDependency);
823   if (!pchSrc)
824     return;
825 
826   // To compute ghashes of a /Yu object file, we need to build on the the
827   // ghashes of the /Yc PCH object. After we are done hashing, discard the
828   // ghashes from the PCH source so we don't unnecessarily try to deduplicate
829   // them.
830   std::vector<GloballyHashedType> hashVec =
831       pchSrc->ghashes.take_front(precompDependency.getTypesCount());
832   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
833     hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
834     isItemIndex.push_back(isIdRecord(ty.kind()));
835   });
836   hashVec.erase(hashVec.begin(),
837                 hashVec.begin() + precompDependency.getTypesCount());
838   assignGHashesFromVector(std::move(hashVec));
839 }
840 
841 void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
842   fillMapFromGHashes(g);
843   // This object was compiled with /Yu, so process the corresponding
844   // precompiled headers object (/Yc) first. Some type indices in the current
845   // object are referencing data in the precompiled headers object, so we need
846   // both to be loaded.
847   if (Error e = mergeInPrecompHeaderObj()) {
848     typeMergingError = joinErrors(std::move(typeMergingError), std::move(e));
849     return;
850   }
851 
852   tpiMap = indexMapStorage;
853   ipiMap = indexMapStorage;
854   mergeUniqueTypeRecords(file->debugTypes,
855                          TypeIndex(precompDependency.getStartTypeIndex() +
856                                    precompDependency.getTypesCount()));
857   if (config->showSummary) {
858     nbTypeRecords = ghashes.size();
859     nbTypeRecordsBytes = file->debugTypes.size();
860   }
861 }
862 
863 namespace {
864 /// A concurrent hash table for global type hashing. It is based on this paper:
865 /// Concurrent Hash Tables: Fast and General(?)!
866 /// https://dl.acm.org/doi/10.1145/3309206
867 ///
868 /// This hash table is meant to be used in two phases:
869 /// 1. concurrent insertions
870 /// 2. concurrent reads
871 /// It does not support lookup, deletion, or rehashing. It uses linear probing.
872 ///
873 /// The paper describes storing a key-value pair in two machine words.
874 /// Generally, the values stored in this map are type indices, and we can use
875 /// those values to recover the ghash key from a side table. This allows us to
876 /// shrink the table entries further at the cost of some loads, and sidesteps
877 /// the need for a 128 bit atomic compare-and-swap operation.
878 ///
879 /// During insertion, a priority function is used to decide which insertion
880 /// should be preferred. This ensures that the output is deterministic. For
881 /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
882 ///
883 class GHashCell;
884 struct GHashTable {
885   GHashCell *table = nullptr;
886   uint32_t tableSize = 0;
887 
888   GHashTable() = default;
889   ~GHashTable();
890 
891   /// Initialize the table with the given size. Because the table cannot be
892   /// resized, the initial size of the table must be large enough to contain all
893   /// inputs, or insertion may not be able to find an empty cell.
894   void init(uint32_t newTableSize);
895 
896   /// Insert the cell with the given ghash into the table. Return the insertion
897   /// position in the table. It is safe for the caller to store the insertion
898   /// position because the table cannot be resized.
899   uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
900                   GHashCell newCell);
901 };
902 
903 /// A ghash table cell for deduplicating types from TpiSources.
904 class GHashCell {
905   // Force "data" to be 64-bit aligned; otherwise, some versions of clang
906   // will generate calls to libatomic when using some versions of libstdc++
907   // on 32-bit targets.  (Also, in theory, there could be a target where
908   // new[] doesn't always return an 8-byte-aligned allocation.)
909   alignas(sizeof(uint64_t)) uint64_t data = 0;
910 
911 public:
912   GHashCell() = default;
913 
914   // Construct data most to least significant so that sorting works well:
915   // - isItem
916   // - tpiSrcIdx
917   // - ghashIdx
918   // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
919   // non-zero representation.
920   GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
921       : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
922              ghashIdx) {
923     assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
924     assert(ghashIdx == getGHashIdx() && "round trip failure");
925   }
926 
927   explicit GHashCell(uint64_t data) : data(data) {}
928 
929   // The empty cell is all zeros.
930   bool isEmpty() const { return data == 0ULL; }
931 
932   /// Extract the tpiSrcIdx.
933   uint32_t getTpiSrcIdx() const {
934     return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
935   }
936 
937   /// Extract the index into the ghash array of the TpiSource.
938   uint32_t getGHashIdx() const { return (uint32_t)data; }
939 
940   bool isItem() const { return data & (1ULL << 63U); }
941 
942   /// Get the ghash key for this cell.
943   GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
944     return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()];
945   }
946 
947   /// The priority function for the cell. The data is stored such that lower
948   /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
949   /// from earlier sources are more likely to prevail.
950   friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
951     return l.data < r.data;
952   }
953 };
954 } // namespace
955 
956 namespace lld {
957 namespace coff {
958 /// This type is just a wrapper around GHashTable with external linkage so it
959 /// can be used from a header.
960 struct GHashState {
961   GHashTable table;
962 };
963 } // namespace coff
964 } // namespace lld
965 
966 GHashTable::~GHashTable() { delete[] table; }
967 
968 void GHashTable::init(uint32_t newTableSize) {
969   table = new GHashCell[newTableSize];
970   memset(table, 0, newTableSize * sizeof(GHashCell));
971   tableSize = newTableSize;
972 }
973 
974 uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
975                             GHashCell newCell) {
976   assert(!newCell.isEmpty() && "cannot insert empty cell value");
977 
978   // FIXME: The low bytes of SHA1 have low entropy for short records, which
979   // type records are. Swap the byte order for better entropy. A better ghash
980   // won't need this.
981   uint32_t startIdx =
982       ByteSwap_64(*reinterpret_cast<uint64_t *>(&ghash)) % tableSize;
983 
984   // Do a linear probe starting at startIdx.
985   uint32_t idx = startIdx;
986   while (true) {
987     // Run a compare and swap loop. There are four cases:
988     // - cell is empty: CAS into place and return
989     // - cell has matching key, earlier priority: do nothing, return
990     // - cell has matching key, later priority: CAS into place and return
991     // - cell has non-matching key: hash collision, probe next cell
992     auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
993     GHashCell oldCell(cellPtr->load());
994     while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) {
995       // Check if there is an existing ghash entry with a higher priority
996       // (earlier ordering). If so, this is a duplicate, we are done.
997       if (!oldCell.isEmpty() && oldCell < newCell)
998         return idx;
999       // Either the cell is empty, or our value is higher priority. Try to
1000       // compare and swap. If it succeeds, we are done.
1001       if (cellPtr->compare_exchange_weak(oldCell, newCell))
1002         return idx;
1003       // If the CAS failed, check this cell again.
1004     }
1005 
1006     // Advance the probe. Wrap around to the beginning if we run off the end.
1007     ++idx;
1008     idx = idx == tableSize ? 0 : idx;
1009     if (idx == startIdx) {
1010       // If this becomes an issue, we could mark failure and rehash from the
1011       // beginning with a bigger table. There is no difference between rehashing
1012       // internally and starting over.
1013       report_fatal_error("ghash table is full");
1014     }
1015   }
1016   llvm_unreachable("left infloop");
1017 }
1018 
1019 TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
1020     : typeTable(alloc), idTable(alloc), ctx(c) {}
1021 
1022 TypeMerger::~TypeMerger() = default;
1023 
1024 void TypeMerger::mergeTypesWithGHash() {
1025   // Load ghashes. Do type servers and PCH objects first.
1026   {
1027     ScopedTimer t1(ctx.loadGHashTimer);
1028     parallelForEach(dependencySources,
1029                     [&](TpiSource *source) { source->loadGHashes(); });
1030     parallelForEach(objectSources,
1031                     [&](TpiSource *source) { source->loadGHashes(); });
1032   }
1033 
1034   ScopedTimer t2(ctx.mergeGHashTimer);
1035   GHashState ghashState;
1036 
1037   // Estimate the size of hash table needed to deduplicate ghashes. This *must*
1038   // be larger than the number of unique types, or hash table insertion may not
1039   // be able to find a vacant slot. Summing the input types guarantees this, but
1040   // it is a gross overestimate. The table size could be reduced to save memory,
1041   // but it would require implementing rehashing, and this table is generally
1042   // small compared to total memory usage, at eight bytes per input type record,
1043   // and most input type records are larger than eight bytes.
1044   size_t tableSize = 0;
1045   for (TpiSource *source : ctx.tpiSourceList)
1046     tableSize += source->ghashes.size();
1047 
1048   // Cap the table size so that we can use 32-bit cell indices. Type indices are
1049   // also 32-bit, so this is an inherent PDB file format limit anyway.
1050   tableSize =
1051       std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize);
1052   ghashState.table.init(static_cast<uint32_t>(tableSize));
1053 
1054   // Insert ghashes in parallel. During concurrent insertion, we cannot observe
1055   // the contents of the hash table cell, but we can remember the insertion
1056   // position. Because the table does not rehash, the position will not change
1057   // under insertion. After insertion is done, the value of the cell can be read
1058   // to retrieve the final PDB type index.
1059   parallelFor(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) {
1060     TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1061     source->indexMapStorage.resize(source->ghashes.size());
1062     for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
1063       if (source->shouldOmitFromPdb(i)) {
1064         source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
1065         continue;
1066       }
1067       GloballyHashedType ghash = source->ghashes[i];
1068       bool isItem = source->isItemIndex.test(i);
1069       uint32_t cellIdx =
1070           ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i));
1071 
1072       // Store the ghash cell index as a type index in indexMapStorage. Later
1073       // we will replace it with the PDB type index.
1074       source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
1075     }
1076   });
1077 
1078   // Collect all non-empty cells and sort them. This will implicitly assign
1079   // destination type indices, and partition the entries into type records and
1080   // item records. It arranges types in this order:
1081   // - type records
1082   //   - source 0, type 0...
1083   //   - source 1, type 1...
1084   // - item records
1085   //   - source 0, type 1...
1086   //   - source 1, type 0...
1087   std::vector<GHashCell> entries;
1088   for (const GHashCell &cell :
1089        makeArrayRef(ghashState.table.table, tableSize)) {
1090     if (!cell.isEmpty())
1091       entries.push_back(cell);
1092   }
1093   parallelSort(entries, std::less<GHashCell>());
1094   log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1095               tableSize ? double(entries.size()) / tableSize : 0,
1096               entries.size(), tableSize));
1097 
1098   // Find out how many type and item indices there are.
1099   auto mid =
1100       std::lower_bound(entries.begin(), entries.end(), GHashCell(true, 0, 0));
1101   assert((mid == entries.end() || mid->isItem()) &&
1102          (mid == entries.begin() || !std::prev(mid)->isItem()) &&
1103          "midpoint is not midpoint");
1104   uint32_t numTypes = std::distance(entries.begin(), mid);
1105   uint32_t numItems = std::distance(mid, entries.end());
1106   log("Tpi record count: " + Twine(numTypes));
1107   log("Ipi record count: " + Twine(numItems));
1108 
1109   // Make a list of the "unique" type records to merge for each tpi source. Type
1110   // merging will skip indices not on this list. Store the destination PDB type
1111   // index for these unique types in the tpiMap for each source. The entries for
1112   // non-unique types will be filled in prior to type merging.
1113   for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
1114     auto &cell = entries[i];
1115     uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1116     TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1117     source->uniqueTypes.push_back(cell.getGHashIdx());
1118 
1119     // Update the ghash table to store the destination PDB type index in the
1120     // table.
1121     uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1122     uint32_t ghashCellIndex =
1123         source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
1124     ghashState.table.table[ghashCellIndex] =
1125         GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1126   }
1127 
1128   // In parallel, remap all types.
1129   for (TpiSource *source : dependencySources)
1130     source->remapTpiWithGHashes(&ghashState);
1131   parallelForEach(objectSources, [&](TpiSource *source) {
1132     source->remapTpiWithGHashes(&ghashState);
1133   });
1134 
1135   // Build a global map of from function ID to function type.
1136   for (TpiSource *source : ctx.tpiSourceList) {
1137     for (auto idToType : source->funcIdToType)
1138       funcIdToType.insert(idToType);
1139     source->funcIdToType.clear();
1140   }
1141 
1142   clearGHashes();
1143 }
1144 
1145 void TypeMerger::sortDependencies() {
1146   // Order dependencies first, but preserve the existing order.
1147   std::vector<TpiSource *> deps;
1148   std::vector<TpiSource *> objs;
1149   for (TpiSource *s : ctx.tpiSourceList)
1150     (s->isDependency() ? deps : objs).push_back(s);
1151   uint32_t numDeps = deps.size();
1152   uint32_t numObjs = objs.size();
1153   ctx.tpiSourceList = std::move(deps);
1154   ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end());
1155   for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i)
1156     ctx.tpiSourceList[i]->tpiSrcIdx = i;
1157   dependencySources = makeArrayRef(ctx.tpiSourceList.data(), numDeps);
1158   objectSources = makeArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs);
1159 }
1160 
1161 /// Given the index into the ghash table for a particular type, return the type
1162 /// index for that type in the output PDB.
1163 static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1164                                           uint32_t ghashCellIdx) {
1165   GHashCell cell = g->table.table[ghashCellIdx];
1166   return TypeIndex::fromArrayIndex(cell.getGHashIdx());
1167 }
1168 
1169 /// Free heap allocated ghashes.
1170 void TypeMerger::clearGHashes() {
1171   for (TpiSource *src : ctx.tpiSourceList) {
1172     if (src->ownedGHashes)
1173       delete[] src->ghashes.data();
1174     src->ghashes = {};
1175     src->isItemIndex.clear();
1176     src->uniqueTypes.clear();
1177   }
1178 }
1179 
1180 // Fill in a TPI or IPI index map using ghashes. For each source type, use its
1181 // ghash to lookup its final type index in the PDB, and store that in the map.
1182 void TpiSource::fillMapFromGHashes(GHashState *g) {
1183   for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
1184     TypeIndex fakeCellIndex = indexMapStorage[i];
1185     if (fakeCellIndex.isSimple())
1186       indexMapStorage[i] = fakeCellIndex;
1187     else
1188       indexMapStorage[i] =
1189           loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());
1190   }
1191 }
1192