10b57cec5SDimitry Andric //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric // The data structures defined in this file are based on the reference
105ffd83dbSDimitry Andric // implementation which is available at
115ffd83dbSDimitry Andric // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.cpp
125ffd83dbSDimitry Andric //
135ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
160b57cec5SDimitry Andric #include "llvm/DebugInfo/CodeView/RecordName.h"
1781ad6265SDimitry Andric #include "llvm/DebugInfo/CodeView/RecordSerialization.h"
180b57cec5SDimitry Andric #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
190b57cec5SDimitry Andric #include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
200b57cec5SDimitry Andric #include "llvm/DebugInfo/MSF/MSFBuilder.h"
210b57cec5SDimitry Andric #include "llvm/DebugInfo/MSF/MSFCommon.h"
220b57cec5SDimitry Andric #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
230b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
240b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/Hash.h"
2581ad6265SDimitry Andric #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
260b57cec5SDimitry Andric #include "llvm/Support/BinaryItemStream.h"
270b57cec5SDimitry Andric #include "llvm/Support/BinaryStreamWriter.h"
285ffd83dbSDimitry Andric #include "llvm/Support/Parallel.h"
295f757f3fSDimitry Andric #include "llvm/Support/TimeProfiler.h"
300b57cec5SDimitry Andric #include "llvm/Support/xxhash.h"
310b57cec5SDimitry Andric #include <algorithm>
320b57cec5SDimitry Andric #include <vector>
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric using namespace llvm;
350b57cec5SDimitry Andric using namespace llvm::msf;
360b57cec5SDimitry Andric using namespace llvm::pdb;
370b57cec5SDimitry Andric using namespace llvm::codeview;
380b57cec5SDimitry Andric 
395ffd83dbSDimitry Andric // Helper class for building the public and global PDB hash table buckets.
400b57cec5SDimitry Andric struct llvm::pdb::GSIHashStreamBuilder {
415ffd83dbSDimitry Andric   // Sum of the size of all public or global records.
425ffd83dbSDimitry Andric   uint32_t RecordByteSize = 0;
435ffd83dbSDimitry Andric 
445ffd83dbSDimitry Andric   std::vector<PSHashRecord> HashRecords;
455ffd83dbSDimitry Andric 
465ffd83dbSDimitry Andric   // The hash bitmap has `ceil((IPHR_HASH + 1) / 32)` words in it. The
475ffd83dbSDimitry Andric   // reference implementation builds a hash table with IPHR_HASH buckets in it.
485ffd83dbSDimitry Andric   // The last bucket is used to link together free hash table cells in a linked
495ffd83dbSDimitry Andric   // list, but it is always empty in the compressed, on-disk format. However,
505ffd83dbSDimitry Andric   // the bitmap must have a bit for it.
515ffd83dbSDimitry Andric   std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
525ffd83dbSDimitry Andric 
535ffd83dbSDimitry Andric   std::vector<support::ulittle32_t> HashBuckets;
545ffd83dbSDimitry Andric 
555ffd83dbSDimitry Andric   uint32_t calculateSerializedLength() const;
565ffd83dbSDimitry Andric   Error commit(BinaryStreamWriter &Writer);
575ffd83dbSDimitry Andric 
585ffd83dbSDimitry Andric   void finalizePublicBuckets();
595ffd83dbSDimitry Andric   void finalizeGlobalBuckets(uint32_t RecordZeroOffset);
605ffd83dbSDimitry Andric 
615ffd83dbSDimitry Andric   // Assign public and global symbol records into hash table buckets.
625ffd83dbSDimitry Andric   // Modifies the list of records to store the bucket index, but does not
635ffd83dbSDimitry Andric   // change the order.
645ffd83dbSDimitry Andric   void finalizeBuckets(uint32_t RecordZeroOffset,
655ffd83dbSDimitry Andric                        MutableArrayRef<BulkPublic> Globals);
665ffd83dbSDimitry Andric };
675ffd83dbSDimitry Andric 
685ffd83dbSDimitry Andric // DenseMapInfo implementation for deduplicating symbol records.
695ffd83dbSDimitry Andric struct llvm::pdb::SymbolDenseMapInfo {
getEmptyKeyllvm::pdb::SymbolDenseMapInfo700b57cec5SDimitry Andric   static inline CVSymbol getEmptyKey() {
710b57cec5SDimitry Andric     static CVSymbol Empty;
720b57cec5SDimitry Andric     return Empty;
730b57cec5SDimitry Andric   }
getTombstoneKeyllvm::pdb::SymbolDenseMapInfo740b57cec5SDimitry Andric   static inline CVSymbol getTombstoneKey() {
750b57cec5SDimitry Andric     static CVSymbol Tombstone(
760b57cec5SDimitry Andric         DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey());
770b57cec5SDimitry Andric     return Tombstone;
780b57cec5SDimitry Andric   }
getHashValuellvm::pdb::SymbolDenseMapInfo790b57cec5SDimitry Andric   static unsigned getHashValue(const CVSymbol &Val) {
8006c3fb27SDimitry Andric     return xxh3_64bits(Val.RecordData);
810b57cec5SDimitry Andric   }
isEqualllvm::pdb::SymbolDenseMapInfo820b57cec5SDimitry Andric   static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) {
830b57cec5SDimitry Andric     return LHS.RecordData == RHS.RecordData;
840b57cec5SDimitry Andric   }
850b57cec5SDimitry Andric };
860b57cec5SDimitry Andric 
875ffd83dbSDimitry Andric namespace {
885ffd83dbSDimitry Andric LLVM_PACKED_START
895ffd83dbSDimitry Andric struct PublicSym32Layout {
905ffd83dbSDimitry Andric   RecordPrefix Prefix;
915ffd83dbSDimitry Andric   PublicSym32Header Pub;
925ffd83dbSDimitry Andric   // char Name[];
930b57cec5SDimitry Andric };
945ffd83dbSDimitry Andric LLVM_PACKED_END
955ffd83dbSDimitry Andric } // namespace
965ffd83dbSDimitry Andric 
975ffd83dbSDimitry Andric // Calculate how much memory this public needs when serialized.
sizeOfPublic(const BulkPublic & Pub)985ffd83dbSDimitry Andric static uint32_t sizeOfPublic(const BulkPublic &Pub) {
995ffd83dbSDimitry Andric   uint32_t NameLen = Pub.NameLen;
1005ffd83dbSDimitry Andric   NameLen = std::min(NameLen,
1015ffd83dbSDimitry Andric                      uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1));
1025ffd83dbSDimitry Andric   return alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4);
1035ffd83dbSDimitry Andric }
1045ffd83dbSDimitry Andric 
serializePublic(uint8_t * Mem,const BulkPublic & Pub)1055ffd83dbSDimitry Andric static CVSymbol serializePublic(uint8_t *Mem, const BulkPublic &Pub) {
1065ffd83dbSDimitry Andric   // Assume the caller has allocated sizeOfPublic bytes.
1075ffd83dbSDimitry Andric   uint32_t NameLen = std::min(
1085ffd83dbSDimitry Andric       Pub.NameLen, uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1));
1095ffd83dbSDimitry Andric   size_t Size = alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4);
1105ffd83dbSDimitry Andric   assert(Size == sizeOfPublic(Pub));
1115ffd83dbSDimitry Andric   auto *FixedMem = reinterpret_cast<PublicSym32Layout *>(Mem);
1125ffd83dbSDimitry Andric   FixedMem->Prefix.RecordKind = static_cast<uint16_t>(codeview::S_PUB32);
1135ffd83dbSDimitry Andric   FixedMem->Prefix.RecordLen = static_cast<uint16_t>(Size - 2);
1145ffd83dbSDimitry Andric   FixedMem->Pub.Flags = Pub.Flags;
1155ffd83dbSDimitry Andric   FixedMem->Pub.Offset = Pub.Offset;
1165ffd83dbSDimitry Andric   FixedMem->Pub.Segment = Pub.Segment;
1175ffd83dbSDimitry Andric   char *NameMem = reinterpret_cast<char *>(FixedMem + 1);
1185ffd83dbSDimitry Andric   memcpy(NameMem, Pub.Name, NameLen);
1195ffd83dbSDimitry Andric   // Zero the null terminator and remaining bytes.
1205ffd83dbSDimitry Andric   memset(&NameMem[NameLen], 0, Size - sizeof(PublicSym32Layout) - NameLen);
121bdd1243dSDimitry Andric   return CVSymbol(ArrayRef(reinterpret_cast<uint8_t *>(Mem), Size));
1225ffd83dbSDimitry Andric }
1230b57cec5SDimitry Andric 
calculateSerializedLength() const1240b57cec5SDimitry Andric uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
1250b57cec5SDimitry Andric   uint32_t Size = sizeof(GSIHashHeader);
1260b57cec5SDimitry Andric   Size += HashRecords.size() * sizeof(PSHashRecord);
1270b57cec5SDimitry Andric   Size += HashBitmap.size() * sizeof(uint32_t);
1280b57cec5SDimitry Andric   Size += HashBuckets.size() * sizeof(uint32_t);
1290b57cec5SDimitry Andric   return Size;
1300b57cec5SDimitry Andric }
1310b57cec5SDimitry Andric 
commit(BinaryStreamWriter & Writer)1320b57cec5SDimitry Andric Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
1330b57cec5SDimitry Andric   GSIHashHeader Header;
1340b57cec5SDimitry Andric   Header.VerSignature = GSIHashHeader::HdrSignature;
1350b57cec5SDimitry Andric   Header.VerHdr = GSIHashHeader::HdrVersion;
1360b57cec5SDimitry Andric   Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);
1370b57cec5SDimitry Andric   Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric   if (auto EC = Writer.writeObject(Header))
1400b57cec5SDimitry Andric     return EC;
1410b57cec5SDimitry Andric 
142bdd1243dSDimitry Andric   if (auto EC = Writer.writeArray(ArrayRef(HashRecords)))
1430b57cec5SDimitry Andric     return EC;
144bdd1243dSDimitry Andric   if (auto EC = Writer.writeArray(ArrayRef(HashBitmap)))
1450b57cec5SDimitry Andric     return EC;
146bdd1243dSDimitry Andric   if (auto EC = Writer.writeArray(ArrayRef(HashBuckets)))
1470b57cec5SDimitry Andric     return EC;
1480b57cec5SDimitry Andric   return Error::success();
1490b57cec5SDimitry Andric }
1500b57cec5SDimitry Andric 
isAsciiString(StringRef S)1510b57cec5SDimitry Andric static bool isAsciiString(StringRef S) {
1520b57cec5SDimitry Andric   return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
1530b57cec5SDimitry Andric }
1540b57cec5SDimitry Andric 
1550b57cec5SDimitry Andric // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
gsiRecordCmp(StringRef S1,StringRef S2)1565ffd83dbSDimitry Andric static int gsiRecordCmp(StringRef S1, StringRef S2) {
1570b57cec5SDimitry Andric   size_t LS = S1.size();
1580b57cec5SDimitry Andric   size_t RS = S2.size();
1590b57cec5SDimitry Andric   // Shorter strings always compare less than longer strings.
1600b57cec5SDimitry Andric   if (LS != RS)
161fe6060f1SDimitry Andric     return (LS > RS) - (LS < RS);
1620b57cec5SDimitry Andric 
1630b57cec5SDimitry Andric   // If either string contains non ascii characters, memcmp them.
1640b57cec5SDimitry Andric   if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
1655ffd83dbSDimitry Andric     return memcmp(S1.data(), S2.data(), LS);
1660b57cec5SDimitry Andric 
167e8d8bef9SDimitry Andric   // Both strings are ascii, perform a case-insensitive comparison.
168fe6060f1SDimitry Andric   return S1.compare_insensitive(S2.data());
1690b57cec5SDimitry Andric }
1700b57cec5SDimitry Andric 
finalizePublicBuckets()1715ffd83dbSDimitry Andric void GSIStreamBuilder::finalizePublicBuckets() {
1725ffd83dbSDimitry Andric   PSH->finalizeBuckets(0, Publics);
1735ffd83dbSDimitry Andric }
1745ffd83dbSDimitry Andric 
finalizeGlobalBuckets(uint32_t RecordZeroOffset)1755ffd83dbSDimitry Andric void GSIStreamBuilder::finalizeGlobalBuckets(uint32_t RecordZeroOffset) {
1765ffd83dbSDimitry Andric   // Build up a list of globals to be bucketed. Use the BulkPublic data
1775ffd83dbSDimitry Andric   // structure for this purpose, even though these are global records, not
1785ffd83dbSDimitry Andric   // public records. Most of the same fields are required:
1795ffd83dbSDimitry Andric   // - Name
1805ffd83dbSDimitry Andric   // - NameLen
1815ffd83dbSDimitry Andric   // - SymOffset
1825ffd83dbSDimitry Andric   // - BucketIdx
1835ffd83dbSDimitry Andric   // The dead fields are Offset, Segment, and Flags.
1845ffd83dbSDimitry Andric   std::vector<BulkPublic> Records;
1855ffd83dbSDimitry Andric   Records.resize(Globals.size());
1860b57cec5SDimitry Andric   uint32_t SymOffset = RecordZeroOffset;
1875ffd83dbSDimitry Andric   for (size_t I = 0, E = Globals.size(); I < E; ++I) {
1885ffd83dbSDimitry Andric     StringRef Name = getSymbolName(Globals[I]);
1895ffd83dbSDimitry Andric     Records[I].Name = Name.data();
1905ffd83dbSDimitry Andric     Records[I].NameLen = Name.size();
1915ffd83dbSDimitry Andric     Records[I].SymOffset = SymOffset;
1925ffd83dbSDimitry Andric     SymOffset += Globals[I].length();
1930b57cec5SDimitry Andric   }
1940b57cec5SDimitry Andric 
1955ffd83dbSDimitry Andric   GSH->finalizeBuckets(RecordZeroOffset, Records);
1965ffd83dbSDimitry Andric }
1975ffd83dbSDimitry Andric 
finalizeBuckets(uint32_t RecordZeroOffset,MutableArrayRef<BulkPublic> Records)1985ffd83dbSDimitry Andric void GSIHashStreamBuilder::finalizeBuckets(
1995ffd83dbSDimitry Andric     uint32_t RecordZeroOffset, MutableArrayRef<BulkPublic> Records) {
2005ffd83dbSDimitry Andric   // Hash every name in parallel.
20181ad6265SDimitry Andric   parallelFor(0, Records.size(), [&](size_t I) {
2025ffd83dbSDimitry Andric     Records[I].setBucketIdx(hashStringV1(Records[I].Name) % IPHR_HASH);
2035ffd83dbSDimitry Andric   });
2045ffd83dbSDimitry Andric 
2055ffd83dbSDimitry Andric   // Count up the size of each bucket. Then, use an exclusive prefix sum to
2065ffd83dbSDimitry Andric   // calculate the bucket start offsets. This is C++17 std::exclusive_scan, but
2075ffd83dbSDimitry Andric   // we can't use it yet.
2085ffd83dbSDimitry Andric   uint32_t BucketStarts[IPHR_HASH] = {0};
2095ffd83dbSDimitry Andric   for (const BulkPublic &P : Records)
2105ffd83dbSDimitry Andric     ++BucketStarts[P.BucketIdx];
2115ffd83dbSDimitry Andric   uint32_t Sum = 0;
2125ffd83dbSDimitry Andric   for (uint32_t &B : BucketStarts) {
2135ffd83dbSDimitry Andric     uint32_t Size = B;
2145ffd83dbSDimitry Andric     B = Sum;
2155ffd83dbSDimitry Andric     Sum += Size;
2165ffd83dbSDimitry Andric   }
2175ffd83dbSDimitry Andric 
2185ffd83dbSDimitry Andric   // Place globals into the hash table in bucket order. When placing a global,
2195ffd83dbSDimitry Andric   // update the bucket start. Every hash table slot should be filled. Always use
2205ffd83dbSDimitry Andric   // a refcount of one for now.
2215ffd83dbSDimitry Andric   HashRecords.resize(Records.size());
2225ffd83dbSDimitry Andric   uint32_t BucketCursors[IPHR_HASH];
2235ffd83dbSDimitry Andric   memcpy(BucketCursors, BucketStarts, sizeof(BucketCursors));
2245ffd83dbSDimitry Andric   for (int I = 0, E = Records.size(); I < E; ++I) {
2255ffd83dbSDimitry Andric     uint32_t HashIdx = BucketCursors[Records[I].BucketIdx]++;
2265ffd83dbSDimitry Andric     HashRecords[HashIdx].Off = I;
2275ffd83dbSDimitry Andric     HashRecords[HashIdx].CRef = 1;
2285ffd83dbSDimitry Andric   }
2295ffd83dbSDimitry Andric 
2305ffd83dbSDimitry Andric   // Within the buckets, sort each bucket by memcmp of the symbol's name.  It's
2315ffd83dbSDimitry Andric   // important that we use the same sorting algorithm as is used by the
2325ffd83dbSDimitry Andric   // reference implementation to ensure that the search for a record within a
2335ffd83dbSDimitry Andric   // bucket can properly early-out when it detects the record won't be found.
2345ffd83dbSDimitry Andric   // The algorithm used here corresponds to the function
2355ffd83dbSDimitry Andric   // caseInsensitiveComparePchPchCchCch in the reference implementation.
23681ad6265SDimitry Andric   parallelFor(0, IPHR_HASH, [&](size_t I) {
2375ffd83dbSDimitry Andric     auto B = HashRecords.begin() + BucketStarts[I];
2385ffd83dbSDimitry Andric     auto E = HashRecords.begin() + BucketCursors[I];
2395ffd83dbSDimitry Andric     if (B == E)
2405ffd83dbSDimitry Andric       return;
2415ffd83dbSDimitry Andric     auto BucketCmp = [Records](const PSHashRecord &LHash,
2425ffd83dbSDimitry Andric                                const PSHashRecord &RHash) {
2435ffd83dbSDimitry Andric       const BulkPublic &L = Records[uint32_t(LHash.Off)];
2445ffd83dbSDimitry Andric       const BulkPublic &R = Records[uint32_t(RHash.Off)];
2455ffd83dbSDimitry Andric       assert(L.BucketIdx == R.BucketIdx);
2465ffd83dbSDimitry Andric       int Cmp = gsiRecordCmp(L.getName(), R.getName());
2475ffd83dbSDimitry Andric       if (Cmp != 0)
2485ffd83dbSDimitry Andric         return Cmp < 0;
2495ffd83dbSDimitry Andric       // This comparison is necessary to make the sorting stable in the presence
2505ffd83dbSDimitry Andric       // of two static globals with the same name. The easiest way to observe
2515ffd83dbSDimitry Andric       // this is with S_LDATA32 records.
2525ffd83dbSDimitry Andric       return L.SymOffset < R.SymOffset;
2535ffd83dbSDimitry Andric     };
2545ffd83dbSDimitry Andric     llvm::sort(B, E, BucketCmp);
2555ffd83dbSDimitry Andric 
2565ffd83dbSDimitry Andric     // After we are done sorting, replace the global indices with the stream
2575ffd83dbSDimitry Andric     // offsets of each global. Add one when writing symbol offsets to disk.
2585ffd83dbSDimitry Andric     // See GSI1::fixSymRecs.
2595ffd83dbSDimitry Andric     for (PSHashRecord &HRec : make_range(B, E))
2605ffd83dbSDimitry Andric       HRec.Off = Records[uint32_t(HRec.Off)].SymOffset + 1;
2615ffd83dbSDimitry Andric   });
2625ffd83dbSDimitry Andric 
2635ffd83dbSDimitry Andric   // For each non-empty bucket, push the bucket start offset into HashBuckets
2645ffd83dbSDimitry Andric   // and set a bit in the hash bitmap.
2655ffd83dbSDimitry Andric   for (uint32_t I = 0; I < HashBitmap.size(); ++I) {
2665ffd83dbSDimitry Andric     uint32_t Word = 0;
2675ffd83dbSDimitry Andric     for (uint32_t J = 0; J < 32; ++J) {
2685ffd83dbSDimitry Andric       // Skip empty buckets.
2695ffd83dbSDimitry Andric       uint32_t BucketIdx = I * 32 + J;
2705ffd83dbSDimitry Andric       if (BucketIdx >= IPHR_HASH ||
2715ffd83dbSDimitry Andric           BucketStarts[BucketIdx] == BucketCursors[BucketIdx])
2720b57cec5SDimitry Andric         continue;
2735ffd83dbSDimitry Andric       Word |= (1U << J);
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric       // Calculate what the offset of the first hash record in the chain would
2760b57cec5SDimitry Andric       // be if it were inflated to contain 32-bit pointers. On a 32-bit system,
2770b57cec5SDimitry Andric       // each record would be 12 bytes. See HROffsetCalc in gsi.h.
2780b57cec5SDimitry Andric       const int SizeOfHROffsetCalc = 12;
2790b57cec5SDimitry Andric       ulittle32_t ChainStartOff =
2805ffd83dbSDimitry Andric           ulittle32_t(BucketStarts[BucketIdx] * SizeOfHROffsetCalc);
2810b57cec5SDimitry Andric       HashBuckets.push_back(ChainStartOff);
2825ffd83dbSDimitry Andric     }
2835ffd83dbSDimitry Andric     HashBitmap[I] = Word;
2840b57cec5SDimitry Andric   }
2850b57cec5SDimitry Andric }
2860b57cec5SDimitry Andric 
GSIStreamBuilder(msf::MSFBuilder & Msf)2870b57cec5SDimitry Andric GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
2888bcb0991SDimitry Andric     : Msf(Msf), PSH(std::make_unique<GSIHashStreamBuilder>()),
2898bcb0991SDimitry Andric       GSH(std::make_unique<GSIHashStreamBuilder>()) {}
2900b57cec5SDimitry Andric 
29181ad6265SDimitry Andric GSIStreamBuilder::~GSIStreamBuilder() = default;
2920b57cec5SDimitry Andric 
calculatePublicsHashStreamSize() const2930b57cec5SDimitry Andric uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
2940b57cec5SDimitry Andric   uint32_t Size = 0;
2950b57cec5SDimitry Andric   Size += sizeof(PublicsStreamHeader);
2960b57cec5SDimitry Andric   Size += PSH->calculateSerializedLength();
2975ffd83dbSDimitry Andric   Size += Publics.size() * sizeof(uint32_t); // AddrMap
2980b57cec5SDimitry Andric   // FIXME: Add thunk map and section offsets for incremental linking.
2990b57cec5SDimitry Andric 
3000b57cec5SDimitry Andric   return Size;
3010b57cec5SDimitry Andric }
3020b57cec5SDimitry Andric 
calculateGlobalsHashStreamSize() const3030b57cec5SDimitry Andric uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
3040b57cec5SDimitry Andric   return GSH->calculateSerializedLength();
3050b57cec5SDimitry Andric }
3060b57cec5SDimitry Andric 
finalizeMsfLayout()3070b57cec5SDimitry Andric Error GSIStreamBuilder::finalizeMsfLayout() {
3080b57cec5SDimitry Andric   // First we write public symbol records, then we write global symbol records.
3095ffd83dbSDimitry Andric   finalizePublicBuckets();
3105ffd83dbSDimitry Andric   finalizeGlobalBuckets(PSH->RecordByteSize);
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric   Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
3130b57cec5SDimitry Andric   if (!Idx)
3140b57cec5SDimitry Andric     return Idx.takeError();
3155ffd83dbSDimitry Andric   GlobalsStreamIndex = *Idx;
3165ffd83dbSDimitry Andric 
3170b57cec5SDimitry Andric   Idx = Msf.addStream(calculatePublicsHashStreamSize());
3180b57cec5SDimitry Andric   if (!Idx)
3190b57cec5SDimitry Andric     return Idx.takeError();
3205ffd83dbSDimitry Andric   PublicsStreamIndex = *Idx;
3210b57cec5SDimitry Andric 
3225ffd83dbSDimitry Andric   uint32_t RecordBytes = PSH->RecordByteSize + GSH->RecordByteSize;
3230b57cec5SDimitry Andric 
3240b57cec5SDimitry Andric   Idx = Msf.addStream(RecordBytes);
3250b57cec5SDimitry Andric   if (!Idx)
3260b57cec5SDimitry Andric     return Idx.takeError();
3275ffd83dbSDimitry Andric   RecordStreamIndex = *Idx;
3280b57cec5SDimitry Andric   return Error::success();
3290b57cec5SDimitry Andric }
3300b57cec5SDimitry Andric 
addPublicSymbols(std::vector<BulkPublic> && PublicsIn)3315ffd83dbSDimitry Andric void GSIStreamBuilder::addPublicSymbols(std::vector<BulkPublic> &&PublicsIn) {
3325ffd83dbSDimitry Andric   assert(Publics.empty() && PSH->RecordByteSize == 0 &&
3335ffd83dbSDimitry Andric          "publics can only be added once");
3345ffd83dbSDimitry Andric   Publics = std::move(PublicsIn);
3350b57cec5SDimitry Andric 
3365ffd83dbSDimitry Andric   // Sort the symbols by name. PDBs contain lots of symbols, so use parallelism.
3375ffd83dbSDimitry Andric   parallelSort(Publics, [](const BulkPublic &L, const BulkPublic &R) {
3385ffd83dbSDimitry Andric     return L.getName() < R.getName();
3395ffd83dbSDimitry Andric   });
3400b57cec5SDimitry Andric 
3415ffd83dbSDimitry Andric   // Assign offsets and calculate the length of the public symbol records.
3420b57cec5SDimitry Andric   uint32_t SymOffset = 0;
3435ffd83dbSDimitry Andric   for (BulkPublic &Pub : Publics) {
3445ffd83dbSDimitry Andric     Pub.SymOffset = SymOffset;
3455ffd83dbSDimitry Andric     SymOffset += sizeOfPublic(Pub);
3460b57cec5SDimitry Andric   }
3470b57cec5SDimitry Andric 
3485ffd83dbSDimitry Andric   // Remember the length of the public stream records.
3495ffd83dbSDimitry Andric   PSH->RecordByteSize = SymOffset;
3500b57cec5SDimitry Andric }
3510b57cec5SDimitry Andric 
addGlobalSymbol(const ProcRefSym & Sym)3520b57cec5SDimitry Andric void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) {
3535ffd83dbSDimitry Andric   serializeAndAddGlobal(Sym);
3540b57cec5SDimitry Andric }
3550b57cec5SDimitry Andric 
addGlobalSymbol(const DataSym & Sym)3560b57cec5SDimitry Andric void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) {
3575ffd83dbSDimitry Andric   serializeAndAddGlobal(Sym);
3580b57cec5SDimitry Andric }
3590b57cec5SDimitry Andric 
addGlobalSymbol(const ConstantSym & Sym)3600b57cec5SDimitry Andric void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) {
3615ffd83dbSDimitry Andric   serializeAndAddGlobal(Sym);
3620b57cec5SDimitry Andric }
3630b57cec5SDimitry Andric 
3645ffd83dbSDimitry Andric template <typename T>
serializeAndAddGlobal(const T & Symbol)3655ffd83dbSDimitry Andric void GSIStreamBuilder::serializeAndAddGlobal(const T &Symbol) {
3665ffd83dbSDimitry Andric   T Copy(Symbol);
3675ffd83dbSDimitry Andric   addGlobalSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
3685ffd83dbSDimitry Andric                                                    CodeViewContainer::Pdb));
3695ffd83dbSDimitry Andric }
3705ffd83dbSDimitry Andric 
addGlobalSymbol(const codeview::CVSymbol & Symbol)3715ffd83dbSDimitry Andric void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Symbol) {
3725ffd83dbSDimitry Andric   // Ignore duplicate typedefs and constants.
3735ffd83dbSDimitry Andric   if (Symbol.kind() == S_UDT || Symbol.kind() == S_CONSTANT) {
3745ffd83dbSDimitry Andric     auto Iter = GlobalsSeen.insert(Symbol);
3755ffd83dbSDimitry Andric     if (!Iter.second)
3765ffd83dbSDimitry Andric       return;
3775ffd83dbSDimitry Andric   }
3785ffd83dbSDimitry Andric   GSH->RecordByteSize += Symbol.length();
3795ffd83dbSDimitry Andric   Globals.push_back(Symbol);
3805ffd83dbSDimitry Andric }
3815ffd83dbSDimitry Andric 
3825ffd83dbSDimitry Andric // Serialize each public and write it.
writePublics(BinaryStreamWriter & Writer,ArrayRef<BulkPublic> Publics)3835ffd83dbSDimitry Andric static Error writePublics(BinaryStreamWriter &Writer,
3845ffd83dbSDimitry Andric                           ArrayRef<BulkPublic> Publics) {
3855ffd83dbSDimitry Andric   std::vector<uint8_t> Storage;
3865ffd83dbSDimitry Andric   for (const BulkPublic &Pub : Publics) {
3875ffd83dbSDimitry Andric     Storage.resize(sizeOfPublic(Pub));
3885ffd83dbSDimitry Andric     serializePublic(Storage.data(), Pub);
3895ffd83dbSDimitry Andric     if (Error E = Writer.writeBytes(Storage))
3905ffd83dbSDimitry Andric       return E;
3915ffd83dbSDimitry Andric   }
3925ffd83dbSDimitry Andric   return Error::success();
3930b57cec5SDimitry Andric }
3940b57cec5SDimitry Andric 
writeRecords(BinaryStreamWriter & Writer,ArrayRef<CVSymbol> Records)3950b57cec5SDimitry Andric static Error writeRecords(BinaryStreamWriter &Writer,
3960b57cec5SDimitry Andric                           ArrayRef<CVSymbol> Records) {
3975f757f3fSDimitry Andric   BinaryItemStream<CVSymbol> ItemStream(llvm::endianness::little);
3980b57cec5SDimitry Andric   ItemStream.setItems(Records);
3990b57cec5SDimitry Andric   BinaryStreamRef RecordsRef(ItemStream);
4000b57cec5SDimitry Andric   return Writer.writeStreamRef(RecordsRef);
4010b57cec5SDimitry Andric }
4020b57cec5SDimitry Andric 
commitSymbolRecordStream(WritableBinaryStreamRef Stream)4030b57cec5SDimitry Andric Error GSIStreamBuilder::commitSymbolRecordStream(
4040b57cec5SDimitry Andric     WritableBinaryStreamRef Stream) {
4050b57cec5SDimitry Andric   BinaryStreamWriter Writer(Stream);
4060b57cec5SDimitry Andric 
4070b57cec5SDimitry Andric   // Write public symbol records first, followed by global symbol records.  This
4080b57cec5SDimitry Andric   // must match the order that we assume in finalizeMsfLayout when computing
4090b57cec5SDimitry Andric   // PSHZero and GSHZero.
4105ffd83dbSDimitry Andric   if (auto EC = writePublics(Writer, Publics))
4110b57cec5SDimitry Andric     return EC;
4125ffd83dbSDimitry Andric   if (auto EC = writeRecords(Writer, Globals))
4130b57cec5SDimitry Andric     return EC;
4140b57cec5SDimitry Andric 
4150b57cec5SDimitry Andric   return Error::success();
4160b57cec5SDimitry Andric }
4170b57cec5SDimitry Andric 
4185ffd83dbSDimitry Andric static std::vector<support::ulittle32_t>
computeAddrMap(ArrayRef<BulkPublic> Publics)4195ffd83dbSDimitry Andric computeAddrMap(ArrayRef<BulkPublic> Publics) {
4205ffd83dbSDimitry Andric   // Build a parallel vector of indices into the Publics vector, and sort it by
4215ffd83dbSDimitry Andric   // address.
4225ffd83dbSDimitry Andric   std::vector<ulittle32_t> PubAddrMap;
4235ffd83dbSDimitry Andric   PubAddrMap.reserve(Publics.size());
4245ffd83dbSDimitry Andric   for (int I = 0, E = Publics.size(); I < E; ++I)
4255ffd83dbSDimitry Andric     PubAddrMap.push_back(ulittle32_t(I));
4265ffd83dbSDimitry Andric 
4275ffd83dbSDimitry Andric   auto AddrCmp = [Publics](const ulittle32_t &LIdx, const ulittle32_t &RIdx) {
4285ffd83dbSDimitry Andric     const BulkPublic &L = Publics[LIdx];
4295ffd83dbSDimitry Andric     const BulkPublic &R = Publics[RIdx];
4305ffd83dbSDimitry Andric     if (L.Segment != R.Segment)
4315ffd83dbSDimitry Andric       return L.Segment < R.Segment;
4325ffd83dbSDimitry Andric     if (L.Offset != R.Offset)
4335ffd83dbSDimitry Andric       return L.Offset < R.Offset;
4345ffd83dbSDimitry Andric     // parallelSort is unstable, so we have to do name comparison to ensure
4355ffd83dbSDimitry Andric     // that two names for the same location come out in a deterministic order.
4365ffd83dbSDimitry Andric     return L.getName() < R.getName();
4375ffd83dbSDimitry Andric   };
4385ffd83dbSDimitry Andric   parallelSort(PubAddrMap, AddrCmp);
4395ffd83dbSDimitry Andric 
4405ffd83dbSDimitry Andric   // Rewrite the public symbol indices into symbol offsets.
4415ffd83dbSDimitry Andric   for (ulittle32_t &Entry : PubAddrMap)
4425ffd83dbSDimitry Andric     Entry = Publics[Entry].SymOffset;
4435ffd83dbSDimitry Andric   return PubAddrMap;
4445ffd83dbSDimitry Andric }
4455ffd83dbSDimitry Andric 
commitPublicsHashStream(WritableBinaryStreamRef Stream)4460b57cec5SDimitry Andric Error GSIStreamBuilder::commitPublicsHashStream(
4470b57cec5SDimitry Andric     WritableBinaryStreamRef Stream) {
4480b57cec5SDimitry Andric   BinaryStreamWriter Writer(Stream);
4490b57cec5SDimitry Andric   PublicsStreamHeader Header;
4500b57cec5SDimitry Andric 
4510b57cec5SDimitry Andric   // FIXME: Fill these in. They are for incremental linking.
4520b57cec5SDimitry Andric   Header.SymHash = PSH->calculateSerializedLength();
4535ffd83dbSDimitry Andric   Header.AddrMap = Publics.size() * 4;
4540b57cec5SDimitry Andric   Header.NumThunks = 0;
4550b57cec5SDimitry Andric   Header.SizeOfThunk = 0;
4560b57cec5SDimitry Andric   Header.ISectThunkTable = 0;
4570b57cec5SDimitry Andric   memset(Header.Padding, 0, sizeof(Header.Padding));
4580b57cec5SDimitry Andric   Header.OffThunkTable = 0;
4590b57cec5SDimitry Andric   Header.NumSections = 0;
4600b57cec5SDimitry Andric   if (auto EC = Writer.writeObject(Header))
4610b57cec5SDimitry Andric     return EC;
4620b57cec5SDimitry Andric 
4630b57cec5SDimitry Andric   if (auto EC = PSH->commit(Writer))
4640b57cec5SDimitry Andric     return EC;
4650b57cec5SDimitry Andric 
4665ffd83dbSDimitry Andric   std::vector<support::ulittle32_t> PubAddrMap = computeAddrMap(Publics);
4675ffd83dbSDimitry Andric   assert(PubAddrMap.size() == Publics.size());
468bdd1243dSDimitry Andric   if (auto EC = Writer.writeArray(ArrayRef(PubAddrMap)))
4690b57cec5SDimitry Andric     return EC;
4700b57cec5SDimitry Andric 
4710b57cec5SDimitry Andric   return Error::success();
4720b57cec5SDimitry Andric }
4730b57cec5SDimitry Andric 
commitGlobalsHashStream(WritableBinaryStreamRef Stream)4740b57cec5SDimitry Andric Error GSIStreamBuilder::commitGlobalsHashStream(
4750b57cec5SDimitry Andric     WritableBinaryStreamRef Stream) {
4760b57cec5SDimitry Andric   BinaryStreamWriter Writer(Stream);
4770b57cec5SDimitry Andric   return GSH->commit(Writer);
4780b57cec5SDimitry Andric }
4790b57cec5SDimitry Andric 
commit(const msf::MSFLayout & Layout,WritableBinaryStreamRef Buffer)4800b57cec5SDimitry Andric Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout,
4810b57cec5SDimitry Andric                                WritableBinaryStreamRef Buffer) {
4825f757f3fSDimitry Andric   llvm::TimeTraceScope timeScope("Commit GSI stream");
4830b57cec5SDimitry Andric   auto GS = WritableMappedBlockStream::createIndexedStream(
4840b57cec5SDimitry Andric       Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
4850b57cec5SDimitry Andric   auto PS = WritableMappedBlockStream::createIndexedStream(
4860b57cec5SDimitry Andric       Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());
4870b57cec5SDimitry Andric   auto PRS = WritableMappedBlockStream::createIndexedStream(
4885ffd83dbSDimitry Andric       Layout, Buffer, getRecordStreamIndex(), Msf.getAllocator());
4890b57cec5SDimitry Andric 
4900b57cec5SDimitry Andric   if (auto EC = commitSymbolRecordStream(*PRS))
4910b57cec5SDimitry Andric     return EC;
4920b57cec5SDimitry Andric   if (auto EC = commitGlobalsHashStream(*GS))
4930b57cec5SDimitry Andric     return EC;
4940b57cec5SDimitry Andric   if (auto EC = commitPublicsHashStream(*PS))
4950b57cec5SDimitry Andric     return EC;
4960b57cec5SDimitry Andric   return Error::success();
4970b57cec5SDimitry Andric }
498