1 //===--- GlobalModuleIndex.cpp - Global Module Index ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the GlobalModuleIndex class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Serialization/GlobalModuleIndex.h"
14 #include "ASTReaderInternals.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Lex/HeaderSearch.h"
17 #include "clang/Serialization/ASTBitCodes.h"
18 #include "clang/Serialization/ModuleFile.h"
19 #include "clang/Serialization/PCHContainerOperations.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/MapVector.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Bitstream/BitstreamReader.h"
25 #include "llvm/Bitstream/BitstreamWriter.h"
26 #include "llvm/Support/DJB.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/LockFileManager.h"
29 #include "llvm/Support/MemoryBuffer.h"
30 #include "llvm/Support/OnDiskHashTable.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Support/TimeProfiler.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include <cstdio>
35 using namespace clang;
36 using namespace serialization;
37 
38 //----------------------------------------------------------------------------//
39 // Shared constants
40 //----------------------------------------------------------------------------//
41 namespace {
42   enum {
43     /// The block containing the index.
44     GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
45   };
46 
47   /// Describes the record types in the index.
48   enum IndexRecordTypes {
49     /// Contains version information and potentially other metadata,
50     /// used to determine if we can read this global index file.
51     INDEX_METADATA,
52     /// Describes a module, including its file name and dependencies.
53     MODULE,
54     /// The index for identifiers.
55     IDENTIFIER_INDEX
56   };
57 }
58 
59 /// The name of the global index file.
60 static const char * const IndexFileName = "modules.idx";
61 
62 /// The global index file version.
63 static const unsigned CurrentVersion = 1;
64 
65 //----------------------------------------------------------------------------//
66 // Global module index reader.
67 //----------------------------------------------------------------------------//
68 
69 namespace {
70 
71 /// Trait used to read the identifier index from the on-disk hash
72 /// table.
73 class IdentifierIndexReaderTrait {
74 public:
75   typedef StringRef external_key_type;
76   typedef StringRef internal_key_type;
77   typedef SmallVector<unsigned, 2> data_type;
78   typedef unsigned hash_value_type;
79   typedef unsigned offset_type;
80 
EqualKey(const internal_key_type & a,const internal_key_type & b)81   static bool EqualKey(const internal_key_type& a, const internal_key_type& b) {
82     return a == b;
83   }
84 
ComputeHash(const internal_key_type & a)85   static hash_value_type ComputeHash(const internal_key_type& a) {
86     return llvm::djbHash(a);
87   }
88 
89   static std::pair<unsigned, unsigned>
ReadKeyDataLength(const unsigned char * & d)90   ReadKeyDataLength(const unsigned char*& d) {
91     using namespace llvm::support;
92     unsigned KeyLen =
93         endian::readNext<uint16_t, llvm::endianness::little, unaligned>(d);
94     unsigned DataLen =
95         endian::readNext<uint16_t, llvm::endianness::little, unaligned>(d);
96     return std::make_pair(KeyLen, DataLen);
97   }
98 
99   static const internal_key_type&
GetInternalKey(const external_key_type & x)100   GetInternalKey(const external_key_type& x) { return x; }
101 
102   static const external_key_type&
GetExternalKey(const internal_key_type & x)103   GetExternalKey(const internal_key_type& x) { return x; }
104 
ReadKey(const unsigned char * d,unsigned n)105   static internal_key_type ReadKey(const unsigned char* d, unsigned n) {
106     return StringRef((const char *)d, n);
107   }
108 
ReadData(const internal_key_type & k,const unsigned char * d,unsigned DataLen)109   static data_type ReadData(const internal_key_type& k,
110                             const unsigned char* d,
111                             unsigned DataLen) {
112     using namespace llvm::support;
113 
114     data_type Result;
115     while (DataLen > 0) {
116       unsigned ID =
117           endian::readNext<uint32_t, llvm::endianness::little, unaligned>(d);
118       Result.push_back(ID);
119       DataLen -= 4;
120     }
121 
122     return Result;
123   }
124 };
125 
126 typedef llvm::OnDiskIterableChainedHashTable<IdentifierIndexReaderTrait>
127     IdentifierIndexTable;
128 
129 }
130 
GlobalModuleIndex(std::unique_ptr<llvm::MemoryBuffer> IndexBuffer,llvm::BitstreamCursor Cursor)131 GlobalModuleIndex::GlobalModuleIndex(
132     std::unique_ptr<llvm::MemoryBuffer> IndexBuffer,
133     llvm::BitstreamCursor Cursor)
134     : Buffer(std::move(IndexBuffer)), IdentifierIndex(), NumIdentifierLookups(),
135       NumIdentifierLookupHits() {
136   auto Fail = [&](llvm::Error &&Err) {
137     report_fatal_error("Module index '" + Buffer->getBufferIdentifier() +
138                        "' failed: " + toString(std::move(Err)));
139   };
140 
141   llvm::TimeTraceScope TimeScope("Module LoadIndex");
142   // Read the global index.
143   bool InGlobalIndexBlock = false;
144   bool Done = false;
145   while (!Done) {
146     llvm::BitstreamEntry Entry;
147     if (Expected<llvm::BitstreamEntry> Res = Cursor.advance())
148       Entry = Res.get();
149     else
150       Fail(Res.takeError());
151 
152     switch (Entry.Kind) {
153     case llvm::BitstreamEntry::Error:
154       return;
155 
156     case llvm::BitstreamEntry::EndBlock:
157       if (InGlobalIndexBlock) {
158         InGlobalIndexBlock = false;
159         Done = true;
160         continue;
161       }
162       return;
163 
164 
165     case llvm::BitstreamEntry::Record:
166       // Entries in the global index block are handled below.
167       if (InGlobalIndexBlock)
168         break;
169 
170       return;
171 
172     case llvm::BitstreamEntry::SubBlock:
173       if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) {
174         if (llvm::Error Err = Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID))
175           Fail(std::move(Err));
176         InGlobalIndexBlock = true;
177       } else if (llvm::Error Err = Cursor.SkipBlock())
178         Fail(std::move(Err));
179       continue;
180     }
181 
182     SmallVector<uint64_t, 64> Record;
183     StringRef Blob;
184     Expected<unsigned> MaybeIndexRecord =
185         Cursor.readRecord(Entry.ID, Record, &Blob);
186     if (!MaybeIndexRecord)
187       Fail(MaybeIndexRecord.takeError());
188     IndexRecordTypes IndexRecord =
189         static_cast<IndexRecordTypes>(MaybeIndexRecord.get());
190     switch (IndexRecord) {
191     case INDEX_METADATA:
192       // Make sure that the version matches.
193       if (Record.size() < 1 || Record[0] != CurrentVersion)
194         return;
195       break;
196 
197     case MODULE: {
198       unsigned Idx = 0;
199       unsigned ID = Record[Idx++];
200 
201       // Make room for this module's information.
202       if (ID == Modules.size())
203         Modules.push_back(ModuleInfo());
204       else
205         Modules.resize(ID + 1);
206 
207       // Size/modification time for this module file at the time the
208       // global index was built.
209       Modules[ID].Size = Record[Idx++];
210       Modules[ID].ModTime = Record[Idx++];
211 
212       // File name.
213       unsigned NameLen = Record[Idx++];
214       Modules[ID].FileName.assign(Record.begin() + Idx,
215                                   Record.begin() + Idx + NameLen);
216       Idx += NameLen;
217 
218       // Dependencies
219       unsigned NumDeps = Record[Idx++];
220       Modules[ID].Dependencies.insert(Modules[ID].Dependencies.end(),
221                                       Record.begin() + Idx,
222                                       Record.begin() + Idx + NumDeps);
223       Idx += NumDeps;
224 
225       // Make sure we're at the end of the record.
226       assert(Idx == Record.size() && "More module info?");
227 
228       // Record this module as an unresolved module.
229       // FIXME: this doesn't work correctly for module names containing path
230       // separators.
231       StringRef ModuleName = llvm::sys::path::stem(Modules[ID].FileName);
232       // Remove the -<hash of ModuleMapPath>
233       ModuleName = ModuleName.rsplit('-').first;
234       UnresolvedModules[ModuleName] = ID;
235       break;
236     }
237 
238     case IDENTIFIER_INDEX:
239       // Wire up the identifier index.
240       if (Record[0]) {
241         IdentifierIndex = IdentifierIndexTable::Create(
242             (const unsigned char *)Blob.data() + Record[0],
243             (const unsigned char *)Blob.data() + sizeof(uint32_t),
244             (const unsigned char *)Blob.data(), IdentifierIndexReaderTrait());
245       }
246       break;
247     }
248   }
249 }
250 
~GlobalModuleIndex()251 GlobalModuleIndex::~GlobalModuleIndex() {
252   delete static_cast<IdentifierIndexTable *>(IdentifierIndex);
253 }
254 
255 std::pair<GlobalModuleIndex *, llvm::Error>
readIndex(StringRef Path)256 GlobalModuleIndex::readIndex(StringRef Path) {
257   // Load the index file, if it's there.
258   llvm::SmallString<128> IndexPath;
259   IndexPath += Path;
260   llvm::sys::path::append(IndexPath, IndexFileName);
261 
262   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrErr =
263       llvm::MemoryBuffer::getFile(IndexPath.c_str());
264   if (!BufferOrErr)
265     return std::make_pair(nullptr,
266                           llvm::errorCodeToError(BufferOrErr.getError()));
267   std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get());
268 
269   /// The main bitstream cursor for the main block.
270   llvm::BitstreamCursor Cursor(*Buffer);
271 
272   // Sniff for the signature.
273   for (unsigned char C : {'B', 'C', 'G', 'I'}) {
274     if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = Cursor.Read(8)) {
275       if (Res.get() != C)
276         return std::make_pair(
277             nullptr, llvm::createStringError(std::errc::illegal_byte_sequence,
278                                              "expected signature BCGI"));
279     } else
280       return std::make_pair(nullptr, Res.takeError());
281   }
282 
283   return std::make_pair(new GlobalModuleIndex(std::move(Buffer), std::move(Cursor)),
284                         llvm::Error::success());
285 }
286 
getModuleDependencies(ModuleFile * File,SmallVectorImpl<ModuleFile * > & Dependencies)287 void GlobalModuleIndex::getModuleDependencies(
288        ModuleFile *File,
289        SmallVectorImpl<ModuleFile *> &Dependencies) {
290   // Look for information about this module file.
291   llvm::DenseMap<ModuleFile *, unsigned>::iterator Known
292     = ModulesByFile.find(File);
293   if (Known == ModulesByFile.end())
294     return;
295 
296   // Record dependencies.
297   Dependencies.clear();
298   ArrayRef<unsigned> StoredDependencies = Modules[Known->second].Dependencies;
299   for (unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) {
300     if (ModuleFile *MF = Modules[I].File)
301       Dependencies.push_back(MF);
302   }
303 }
304 
lookupIdentifier(StringRef Name,HitSet & Hits)305 bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) {
306   Hits.clear();
307 
308   // If there's no identifier index, there is nothing we can do.
309   if (!IdentifierIndex)
310     return false;
311 
312   // Look into the identifier index.
313   ++NumIdentifierLookups;
314   IdentifierIndexTable &Table
315     = *static_cast<IdentifierIndexTable *>(IdentifierIndex);
316   IdentifierIndexTable::iterator Known = Table.find(Name);
317   if (Known == Table.end()) {
318     return false;
319   }
320 
321   SmallVector<unsigned, 2> ModuleIDs = *Known;
322   for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
323     if (ModuleFile *MF = Modules[ModuleIDs[I]].File)
324       Hits.insert(MF);
325   }
326 
327   ++NumIdentifierLookupHits;
328   return true;
329 }
330 
loadedModuleFile(ModuleFile * File)331 bool GlobalModuleIndex::loadedModuleFile(ModuleFile *File) {
332   // Look for the module in the global module index based on the module name.
333   StringRef Name = File->ModuleName;
334   llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name);
335   if (Known == UnresolvedModules.end()) {
336     return true;
337   }
338 
339   // Rectify this module with the global module index.
340   ModuleInfo &Info = Modules[Known->second];
341 
342   //  If the size and modification time match what we expected, record this
343   // module file.
344   bool Failed = true;
345   if (File->File.getSize() == Info.Size &&
346       File->File.getModificationTime() == Info.ModTime) {
347     Info.File = File;
348     ModulesByFile[File] = Known->second;
349 
350     Failed = false;
351   }
352 
353   // One way or another, we have resolved this module file.
354   UnresolvedModules.erase(Known);
355   return Failed;
356 }
357 
printStats()358 void GlobalModuleIndex::printStats() {
359   std::fprintf(stderr, "*** Global Module Index Statistics:\n");
360   if (NumIdentifierLookups) {
361     fprintf(stderr, "  %u / %u identifier lookups succeeded (%f%%)\n",
362             NumIdentifierLookupHits, NumIdentifierLookups,
363             (double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
364   }
365   std::fprintf(stderr, "\n");
366 }
367 
dump()368 LLVM_DUMP_METHOD void GlobalModuleIndex::dump() {
369   llvm::errs() << "*** Global Module Index Dump:\n";
370   llvm::errs() << "Module files:\n";
371   for (auto &MI : Modules) {
372     llvm::errs() << "** " << MI.FileName << "\n";
373     if (MI.File)
374       MI.File->dump();
375     else
376       llvm::errs() << "\n";
377   }
378   llvm::errs() << "\n";
379 }
380 
381 //----------------------------------------------------------------------------//
382 // Global module index writer.
383 //----------------------------------------------------------------------------//
384 
385 namespace {
386   /// Provides information about a specific module file.
387   struct ModuleFileInfo {
388     /// The numberic ID for this module file.
389     unsigned ID;
390 
391     /// The set of modules on which this module depends. Each entry is
392     /// a module ID.
393     SmallVector<unsigned, 4> Dependencies;
394     ASTFileSignature Signature;
395   };
396 
397   struct ImportedModuleFileInfo {
398     off_t StoredSize;
399     time_t StoredModTime;
400     ASTFileSignature StoredSignature;
ImportedModuleFileInfo__anon5f0861fe0511::ImportedModuleFileInfo401     ImportedModuleFileInfo(off_t Size, time_t ModTime, ASTFileSignature Sig)
402         : StoredSize(Size), StoredModTime(ModTime), StoredSignature(Sig) {}
403   };
404 
405   /// Builder that generates the global module index file.
406   class GlobalModuleIndexBuilder {
407     FileManager &FileMgr;
408     const PCHContainerReader &PCHContainerRdr;
409 
410     /// Mapping from files to module file information.
411     using ModuleFilesMap = llvm::MapVector<FileEntryRef, ModuleFileInfo>;
412 
413     /// Information about each of the known module files.
414     ModuleFilesMap ModuleFiles;
415 
416     /// Mapping from the imported module file to the imported
417     /// information.
418     using ImportedModuleFilesMap =
419         std::multimap<FileEntryRef, ImportedModuleFileInfo>;
420 
421     /// Information about each importing of a module file.
422     ImportedModuleFilesMap ImportedModuleFiles;
423 
424     /// Mapping from identifiers to the list of module file IDs that
425     /// consider this identifier to be interesting.
426     typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
427 
428     /// A mapping from all interesting identifiers to the set of module
429     /// files in which those identifiers are considered interesting.
430     InterestingIdentifierMap InterestingIdentifiers;
431 
432     /// Write the block-info block for the global module index file.
433     void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
434 
435     /// Retrieve the module file information for the given file.
getModuleFileInfo(FileEntryRef File)436     ModuleFileInfo &getModuleFileInfo(FileEntryRef File) {
437       auto Known = ModuleFiles.find(File);
438       if (Known != ModuleFiles.end())
439         return Known->second;
440 
441       unsigned NewID = ModuleFiles.size();
442       ModuleFileInfo &Info = ModuleFiles[File];
443       Info.ID = NewID;
444       return Info;
445     }
446 
447   public:
GlobalModuleIndexBuilder(FileManager & FileMgr,const PCHContainerReader & PCHContainerRdr)448     explicit GlobalModuleIndexBuilder(
449         FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr)
450         : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr) {}
451 
452     /// Load the contents of the given module file into the builder.
453     llvm::Error loadModuleFile(FileEntryRef File);
454 
455     /// Write the index to the given bitstream.
456     /// \returns true if an error occurred, false otherwise.
457     bool writeIndex(llvm::BitstreamWriter &Stream);
458   };
459 }
460 
emitBlockID(unsigned ID,const char * Name,llvm::BitstreamWriter & Stream,SmallVectorImpl<uint64_t> & Record)461 static void emitBlockID(unsigned ID, const char *Name,
462                         llvm::BitstreamWriter &Stream,
463                         SmallVectorImpl<uint64_t> &Record) {
464   Record.clear();
465   Record.push_back(ID);
466   Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record);
467 
468   // Emit the block name if present.
469   if (!Name || Name[0] == 0) return;
470   Record.clear();
471   while (*Name)
472     Record.push_back(*Name++);
473   Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record);
474 }
475 
emitRecordID(unsigned ID,const char * Name,llvm::BitstreamWriter & Stream,SmallVectorImpl<uint64_t> & Record)476 static void emitRecordID(unsigned ID, const char *Name,
477                          llvm::BitstreamWriter &Stream,
478                          SmallVectorImpl<uint64_t> &Record) {
479   Record.clear();
480   Record.push_back(ID);
481   while (*Name)
482     Record.push_back(*Name++);
483   Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record);
484 }
485 
486 void
emitBlockInfoBlock(llvm::BitstreamWriter & Stream)487 GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
488   SmallVector<uint64_t, 64> Record;
489   Stream.EnterBlockInfoBlock();
490 
491 #define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
492 #define RECORD(X) emitRecordID(X, #X, Stream, Record)
493   BLOCK(GLOBAL_INDEX_BLOCK);
494   RECORD(INDEX_METADATA);
495   RECORD(MODULE);
496   RECORD(IDENTIFIER_INDEX);
497 #undef RECORD
498 #undef BLOCK
499 
500   Stream.ExitBlock();
501 }
502 
503 namespace {
504   class InterestingASTIdentifierLookupTrait
505     : public serialization::reader::ASTIdentifierLookupTraitBase {
506 
507   public:
508     /// The identifier and whether it is "interesting".
509     typedef std::pair<StringRef, bool> data_type;
510 
ReadData(const internal_key_type & k,const unsigned char * d,unsigned DataLen)511     data_type ReadData(const internal_key_type& k,
512                        const unsigned char* d,
513                        unsigned DataLen) {
514       // The first bit indicates whether this identifier is interesting.
515       // That's all we care about.
516       using namespace llvm::support;
517       unsigned RawID =
518           endian::readNext<uint32_t, llvm::endianness::little, unaligned>(d);
519       bool IsInteresting = RawID & 0x01;
520       return std::make_pair(k, IsInteresting);
521     }
522   };
523 }
524 
loadModuleFile(FileEntryRef File)525 llvm::Error GlobalModuleIndexBuilder::loadModuleFile(FileEntryRef File) {
526   // Open the module file.
527 
528   auto Buffer = FileMgr.getBufferForFile(File, /*isVolatile=*/true);
529   if (!Buffer)
530     return llvm::createStringError(Buffer.getError(),
531                                    "failed getting buffer for module file");
532 
533   // Initialize the input stream
534   llvm::BitstreamCursor InStream(PCHContainerRdr.ExtractPCH(**Buffer));
535 
536   // Sniff for the signature.
537   for (unsigned char C : {'C', 'P', 'C', 'H'})
538     if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = InStream.Read(8)) {
539       if (Res.get() != C)
540         return llvm::createStringError(std::errc::illegal_byte_sequence,
541                                        "expected signature CPCH");
542     } else
543       return Res.takeError();
544 
545   // Record this module file and assign it a unique ID (if it doesn't have
546   // one already).
547   unsigned ID = getModuleFileInfo(File).ID;
548 
549   // Search for the blocks and records we care about.
550   enum { Other, ControlBlock, ASTBlock, DiagnosticOptionsBlock } State = Other;
551   bool Done = false;
552   while (!Done) {
553     Expected<llvm::BitstreamEntry> MaybeEntry = InStream.advance();
554     if (!MaybeEntry)
555       return MaybeEntry.takeError();
556     llvm::BitstreamEntry Entry = MaybeEntry.get();
557 
558     switch (Entry.Kind) {
559     case llvm::BitstreamEntry::Error:
560       Done = true;
561       continue;
562 
563     case llvm::BitstreamEntry::Record:
564       // In the 'other' state, just skip the record. We don't care.
565       if (State == Other) {
566         if (llvm::Expected<unsigned> Skipped = InStream.skipRecord(Entry.ID))
567           continue;
568         else
569           return Skipped.takeError();
570       }
571 
572       // Handle potentially-interesting records below.
573       break;
574 
575     case llvm::BitstreamEntry::SubBlock:
576       if (Entry.ID == CONTROL_BLOCK_ID) {
577         if (llvm::Error Err = InStream.EnterSubBlock(CONTROL_BLOCK_ID))
578           return Err;
579 
580         // Found the control block.
581         State = ControlBlock;
582         continue;
583       }
584 
585       if (Entry.ID == AST_BLOCK_ID) {
586         if (llvm::Error Err = InStream.EnterSubBlock(AST_BLOCK_ID))
587           return Err;
588 
589         // Found the AST block.
590         State = ASTBlock;
591         continue;
592       }
593 
594       if (Entry.ID == UNHASHED_CONTROL_BLOCK_ID) {
595         if (llvm::Error Err = InStream.EnterSubBlock(UNHASHED_CONTROL_BLOCK_ID))
596           return Err;
597 
598         // Found the Diagnostic Options block.
599         State = DiagnosticOptionsBlock;
600         continue;
601       }
602 
603       if (llvm::Error Err = InStream.SkipBlock())
604         return Err;
605 
606       continue;
607 
608     case llvm::BitstreamEntry::EndBlock:
609       State = Other;
610       continue;
611     }
612 
613     // Read the given record.
614     SmallVector<uint64_t, 64> Record;
615     StringRef Blob;
616     Expected<unsigned> MaybeCode = InStream.readRecord(Entry.ID, Record, &Blob);
617     if (!MaybeCode)
618       return MaybeCode.takeError();
619     unsigned Code = MaybeCode.get();
620 
621     // Handle module dependencies.
622     if (State == ControlBlock && Code == IMPORTS) {
623       // Load each of the imported PCH files.
624       unsigned Idx = 0, N = Record.size();
625       while (Idx < N) {
626         // Read information about the AST file.
627 
628         // Skip the imported kind
629         ++Idx;
630 
631         // Skip if it is standard C++ module
632         ++Idx;
633 
634         // Skip the import location
635         ++Idx;
636 
637         // Load stored size/modification time.
638         off_t StoredSize = (off_t)Record[Idx++];
639         time_t StoredModTime = (time_t)Record[Idx++];
640 
641         // Skip the stored signature.
642         // FIXME: we could read the signature out of the import and validate it.
643         auto FirstSignatureByte = Record.begin() + Idx;
644         ASTFileSignature StoredSignature = ASTFileSignature::create(
645             FirstSignatureByte, FirstSignatureByte + ASTFileSignature::size);
646         Idx += ASTFileSignature::size;
647 
648         // Skip the module name (currently this is only used for prebuilt
649         // modules while here we are only dealing with cached).
650         Idx += Record[Idx] + 1;
651 
652         // Retrieve the imported file name.
653         unsigned Length = Record[Idx++];
654         SmallString<128> ImportedFile(Record.begin() + Idx,
655                                       Record.begin() + Idx + Length);
656         Idx += Length;
657 
658         // Find the imported module file.
659         auto DependsOnFile =
660             FileMgr.getOptionalFileRef(ImportedFile, /*OpenFile=*/false,
661                                        /*CacheFailure=*/false);
662 
663         if (!DependsOnFile)
664           return llvm::createStringError(std::errc::bad_file_descriptor,
665                                          "imported file \"%s\" not found",
666                                          ImportedFile.c_str());
667 
668         // Save the information in ImportedModuleFileInfo so we can verify after
669         // loading all pcms.
670         ImportedModuleFiles.insert(std::make_pair(
671             *DependsOnFile, ImportedModuleFileInfo(StoredSize, StoredModTime,
672                                                    StoredSignature)));
673 
674         // Record the dependency.
675         unsigned DependsOnID = getModuleFileInfo(*DependsOnFile).ID;
676         getModuleFileInfo(File).Dependencies.push_back(DependsOnID);
677       }
678 
679       continue;
680     }
681 
682     // Handle the identifier table
683     if (State == ASTBlock && Code == IDENTIFIER_TABLE && Record[0] > 0) {
684       typedef llvm::OnDiskIterableChainedHashTable<
685           InterestingASTIdentifierLookupTrait> InterestingIdentifierTable;
686       std::unique_ptr<InterestingIdentifierTable> Table(
687           InterestingIdentifierTable::Create(
688               (const unsigned char *)Blob.data() + Record[0],
689               (const unsigned char *)Blob.data() + sizeof(uint32_t),
690               (const unsigned char *)Blob.data()));
691       for (InterestingIdentifierTable::data_iterator D = Table->data_begin(),
692                                                      DEnd = Table->data_end();
693            D != DEnd; ++D) {
694         std::pair<StringRef, bool> Ident = *D;
695         if (Ident.second)
696           InterestingIdentifiers[Ident.first].push_back(ID);
697         else
698           (void)InterestingIdentifiers[Ident.first];
699       }
700     }
701 
702     // Get Signature.
703     if (State == DiagnosticOptionsBlock && Code == SIGNATURE) {
704       auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
705       assert(Signature != ASTFileSignature::createDummy() &&
706              "Dummy AST file signature not backpatched in ASTWriter.");
707       getModuleFileInfo(File).Signature = Signature;
708     }
709 
710     // We don't care about this record.
711   }
712 
713   return llvm::Error::success();
714 }
715 
716 namespace {
717 
718 /// Trait used to generate the identifier index as an on-disk hash
719 /// table.
720 class IdentifierIndexWriterTrait {
721 public:
722   typedef StringRef key_type;
723   typedef StringRef key_type_ref;
724   typedef SmallVector<unsigned, 2> data_type;
725   typedef const SmallVector<unsigned, 2> &data_type_ref;
726   typedef unsigned hash_value_type;
727   typedef unsigned offset_type;
728 
ComputeHash(key_type_ref Key)729   static hash_value_type ComputeHash(key_type_ref Key) {
730     return llvm::djbHash(Key);
731   }
732 
733   std::pair<unsigned,unsigned>
EmitKeyDataLength(raw_ostream & Out,key_type_ref Key,data_type_ref Data)734   EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref Data) {
735     using namespace llvm::support;
736     endian::Writer LE(Out, llvm::endianness::little);
737     unsigned KeyLen = Key.size();
738     unsigned DataLen = Data.size() * 4;
739     LE.write<uint16_t>(KeyLen);
740     LE.write<uint16_t>(DataLen);
741     return std::make_pair(KeyLen, DataLen);
742   }
743 
EmitKey(raw_ostream & Out,key_type_ref Key,unsigned KeyLen)744   void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) {
745     Out.write(Key.data(), KeyLen);
746   }
747 
EmitData(raw_ostream & Out,key_type_ref Key,data_type_ref Data,unsigned DataLen)748   void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data,
749                 unsigned DataLen) {
750     using namespace llvm::support;
751     for (unsigned I = 0, N = Data.size(); I != N; ++I)
752       endian::write<uint32_t>(Out, Data[I], llvm::endianness::little);
753   }
754 };
755 
756 }
757 
writeIndex(llvm::BitstreamWriter & Stream)758 bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
759   for (auto MapEntry : ImportedModuleFiles) {
760     auto File = MapEntry.first;
761     ImportedModuleFileInfo &Info = MapEntry.second;
762     if (getModuleFileInfo(File).Signature) {
763       if (getModuleFileInfo(File).Signature != Info.StoredSignature)
764         // Verify Signature.
765         return true;
766     } else if (Info.StoredSize != File.getSize() ||
767                Info.StoredModTime != File.getModificationTime())
768       // Verify Size and ModTime.
769       return true;
770   }
771 
772   using namespace llvm;
773   llvm::TimeTraceScope TimeScope("Module WriteIndex");
774 
775   // Emit the file header.
776   Stream.Emit((unsigned)'B', 8);
777   Stream.Emit((unsigned)'C', 8);
778   Stream.Emit((unsigned)'G', 8);
779   Stream.Emit((unsigned)'I', 8);
780 
781   // Write the block-info block, which describes the records in this bitcode
782   // file.
783   emitBlockInfoBlock(Stream);
784 
785   Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
786 
787   // Write the metadata.
788   SmallVector<uint64_t, 2> Record;
789   Record.push_back(CurrentVersion);
790   Stream.EmitRecord(INDEX_METADATA, Record);
791 
792   // Write the set of known module files.
793   for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
794                                 MEnd = ModuleFiles.end();
795        M != MEnd; ++M) {
796     Record.clear();
797     Record.push_back(M->second.ID);
798     Record.push_back(M->first.getSize());
799     Record.push_back(M->first.getModificationTime());
800 
801     // File name
802     StringRef Name(M->first.getName());
803     Record.push_back(Name.size());
804     Record.append(Name.begin(), Name.end());
805 
806     // Dependencies
807     Record.push_back(M->second.Dependencies.size());
808     Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
809     Stream.EmitRecord(MODULE, Record);
810   }
811 
812   // Write the identifier -> module file mapping.
813   {
814     llvm::OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait> Generator;
815     IdentifierIndexWriterTrait Trait;
816 
817     // Populate the hash table.
818     for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
819                                             IEnd = InterestingIdentifiers.end();
820          I != IEnd; ++I) {
821       Generator.insert(I->first(), I->second, Trait);
822     }
823 
824     // Create the on-disk hash table in a buffer.
825     SmallString<4096> IdentifierTable;
826     uint32_t BucketOffset;
827     {
828       using namespace llvm::support;
829       llvm::raw_svector_ostream Out(IdentifierTable);
830       // Make sure that no bucket is at offset 0
831       endian::write<uint32_t>(Out, 0, llvm::endianness::little);
832       BucketOffset = Generator.Emit(Out, Trait);
833     }
834 
835     // Create a blob abbreviation
836     auto Abbrev = std::make_shared<BitCodeAbbrev>();
837     Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
838     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
839     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
840     unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
841 
842     // Write the identifier table
843     uint64_t Record[] = {IDENTIFIER_INDEX, BucketOffset};
844     Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable);
845   }
846 
847   Stream.ExitBlock();
848   return false;
849 }
850 
851 llvm::Error
writeIndex(FileManager & FileMgr,const PCHContainerReader & PCHContainerRdr,StringRef Path)852 GlobalModuleIndex::writeIndex(FileManager &FileMgr,
853                               const PCHContainerReader &PCHContainerRdr,
854                               StringRef Path) {
855   llvm::SmallString<128> IndexPath;
856   IndexPath += Path;
857   llvm::sys::path::append(IndexPath, IndexFileName);
858 
859   // Coordinate building the global index file with other processes that might
860   // try to do the same.
861   llvm::LockFileManager Locked(IndexPath);
862   switch (Locked) {
863   case llvm::LockFileManager::LFS_Error:
864     return llvm::createStringError(std::errc::io_error, "LFS error");
865 
866   case llvm::LockFileManager::LFS_Owned:
867     // We're responsible for building the index ourselves. Do so below.
868     break;
869 
870   case llvm::LockFileManager::LFS_Shared:
871     // Someone else is responsible for building the index. We don't care
872     // when they finish, so we're done.
873     return llvm::createStringError(std::errc::device_or_resource_busy,
874                                    "someone else is building the index");
875   }
876 
877   // The module index builder.
878   GlobalModuleIndexBuilder Builder(FileMgr, PCHContainerRdr);
879 
880   // Load each of the module files.
881   std::error_code EC;
882   for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd;
883        D != DEnd && !EC;
884        D.increment(EC)) {
885     // If this isn't a module file, we don't care.
886     if (llvm::sys::path::extension(D->path()) != ".pcm") {
887       // ... unless it's a .pcm.lock file, which indicates that someone is
888       // in the process of rebuilding a module. They'll rebuild the index
889       // at the end of that translation unit, so we don't have to.
890       if (llvm::sys::path::extension(D->path()) == ".pcm.lock")
891         return llvm::createStringError(std::errc::device_or_resource_busy,
892                                        "someone else is building the index");
893 
894       continue;
895     }
896 
897     // If we can't find the module file, skip it.
898     auto ModuleFile = FileMgr.getOptionalFileRef(D->path());
899     if (!ModuleFile)
900       continue;
901 
902     // Load this module file.
903     if (llvm::Error Err = Builder.loadModuleFile(*ModuleFile))
904       return Err;
905   }
906 
907   // The output buffer, into which the global index will be written.
908   SmallString<16> OutputBuffer;
909   {
910     llvm::BitstreamWriter OutputStream(OutputBuffer);
911     if (Builder.writeIndex(OutputStream))
912       return llvm::createStringError(std::errc::io_error,
913                                      "failed writing index");
914   }
915 
916   return llvm::writeToOutput(IndexPath, [&OutputBuffer](llvm::raw_ostream &OS) {
917     OS << OutputBuffer;
918     return llvm::Error::success();
919   });
920 }
921 
922 namespace {
923   class GlobalIndexIdentifierIterator : public IdentifierIterator {
924     /// The current position within the identifier lookup table.
925     IdentifierIndexTable::key_iterator Current;
926 
927     /// The end position within the identifier lookup table.
928     IdentifierIndexTable::key_iterator End;
929 
930   public:
GlobalIndexIdentifierIterator(IdentifierIndexTable & Idx)931     explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) {
932       Current = Idx.key_begin();
933       End = Idx.key_end();
934     }
935 
Next()936     StringRef Next() override {
937       if (Current == End)
938         return StringRef();
939 
940       StringRef Result = *Current;
941       ++Current;
942       return Result;
943     }
944   };
945 }
946 
createIdentifierIterator() const947 IdentifierIterator *GlobalModuleIndex::createIdentifierIterator() const {
948   IdentifierIndexTable &Table =
949     *static_cast<IdentifierIndexTable *>(IdentifierIndex);
950   return new GlobalIndexIdentifierIterator(Table);
951 }
952