1 //===- ModuleManager.cpp - Module Manager ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the ModuleManager class, which manages a set of loaded
10 //  modules for the ASTReader.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Serialization/ModuleManager.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/LLVM.h"
17 #include "clang/Lex/HeaderSearch.h"
18 #include "clang/Lex/ModuleMap.h"
19 #include "clang/Serialization/GlobalModuleIndex.h"
20 #include "clang/Serialization/InMemoryModuleCache.h"
21 #include "clang/Serialization/ModuleFile.h"
22 #include "clang/Serialization/PCHContainerOperations.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/ADT/iterator.h"
29 #include "llvm/Support/Chrono.h"
30 #include "llvm/Support/DOTGraphTraits.h"
31 #include "llvm/Support/ErrorOr.h"
32 #include "llvm/Support/GraphWriter.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/VirtualFileSystem.h"
35 #include <algorithm>
36 #include <cassert>
37 #include <memory>
38 #include <string>
39 #include <system_error>
40 
41 using namespace clang;
42 using namespace serialization;
43 
44 ModuleFile *ModuleManager::lookupByFileName(StringRef Name) const {
45   auto Entry = FileMgr.getFile(Name, /*OpenFile=*/false,
46                                /*CacheFailure=*/false);
47   if (Entry)
48     return lookup(*Entry);
49 
50   return nullptr;
51 }
52 
53 ModuleFile *ModuleManager::lookupByModuleName(StringRef Name) const {
54   if (const Module *Mod = HeaderSearchInfo.getModuleMap().findModule(Name))
55     if (const FileEntry *File = Mod->getASTFile())
56       return lookup(File);
57 
58   return nullptr;
59 }
60 
61 ModuleFile *ModuleManager::lookup(const FileEntry *File) const {
62   auto Known = Modules.find(File);
63   if (Known == Modules.end())
64     return nullptr;
65 
66   return Known->second;
67 }
68 
69 std::unique_ptr<llvm::MemoryBuffer>
70 ModuleManager::lookupBuffer(StringRef Name) {
71   auto Entry = FileMgr.getFile(Name, /*OpenFile=*/false,
72                                /*CacheFailure=*/false);
73   if (!Entry)
74     return nullptr;
75   return std::move(InMemoryBuffers[*Entry]);
76 }
77 
78 static bool checkSignature(ASTFileSignature Signature,
79                            ASTFileSignature ExpectedSignature,
80                            std::string &ErrorStr) {
81   if (!ExpectedSignature || Signature == ExpectedSignature)
82     return false;
83 
84   ErrorStr =
85       Signature ? "signature mismatch" : "could not read module signature";
86   return true;
87 }
88 
89 static void updateModuleImports(ModuleFile &MF, ModuleFile *ImportedBy,
90                                 SourceLocation ImportLoc) {
91   if (ImportedBy) {
92     MF.ImportedBy.insert(ImportedBy);
93     ImportedBy->Imports.insert(&MF);
94   } else {
95     if (!MF.DirectlyImported)
96       MF.ImportLoc = ImportLoc;
97 
98     MF.DirectlyImported = true;
99   }
100 }
101 
102 ModuleManager::AddModuleResult
103 ModuleManager::addModule(StringRef FileName, ModuleKind Type,
104                          SourceLocation ImportLoc, ModuleFile *ImportedBy,
105                          unsigned Generation,
106                          off_t ExpectedSize, time_t ExpectedModTime,
107                          ASTFileSignature ExpectedSignature,
108                          ASTFileSignatureReader ReadSignature,
109                          ModuleFile *&Module,
110                          std::string &ErrorStr) {
111   Module = nullptr;
112 
113   // Look for the file entry. This only fails if the expected size or
114   // modification time differ.
115   OptionalFileEntryRefDegradesToFileEntryPtr Entry;
116   if (Type == MK_ExplicitModule || Type == MK_PrebuiltModule) {
117     // If we're not expecting to pull this file out of the module cache, it
118     // might have a different mtime due to being moved across filesystems in
119     // a distributed build. The size must still match, though. (As must the
120     // contents, but we can't check that.)
121     ExpectedModTime = 0;
122   }
123   // Note: ExpectedSize and ExpectedModTime will be 0 for MK_ImplicitModule
124   // when using an ASTFileSignature.
125   if (lookupModuleFile(FileName, ExpectedSize, ExpectedModTime, Entry)) {
126     ErrorStr = "module file out of date";
127     return OutOfDate;
128   }
129 
130   if (!Entry && FileName != "-") {
131     ErrorStr = "module file not found";
132     return Missing;
133   }
134 
135   // The ModuleManager's use of FileEntry nodes as the keys for its map of
136   // loaded modules is less than ideal. Uniqueness for FileEntry nodes is
137   // maintained by FileManager, which in turn uses inode numbers on hosts
138   // that support that. When coupled with the module cache's proclivity for
139   // turning over and deleting stale PCMs, this means entries for different
140   // module files can wind up reusing the same underlying inode. When this
141   // happens, subsequent accesses to the Modules map will disagree on the
142   // ModuleFile associated with a given file. In general, it is not sufficient
143   // to resolve this conundrum with a type like FileEntryRef that stores the
144   // name of the FileEntry node on first access because of path canonicalization
145   // issues. However, the paths constructed for implicit module builds are
146   // fully under Clang's control. We *can*, therefore, rely on their structure
147   // being consistent across operating systems and across subsequent accesses
148   // to the Modules map.
149   auto implicitModuleNamesMatch = [](ModuleKind Kind, const ModuleFile *MF,
150                                      const FileEntry *Entry) -> bool {
151     if (Kind != MK_ImplicitModule)
152       return true;
153     return Entry->getName() == MF->FileName;
154   };
155 
156   // Check whether we already loaded this module, before
157   if (ModuleFile *ModuleEntry = Modules.lookup(Entry)) {
158     if (implicitModuleNamesMatch(Type, ModuleEntry, Entry)) {
159       // Check the stored signature.
160       if (checkSignature(ModuleEntry->Signature, ExpectedSignature, ErrorStr))
161         return OutOfDate;
162 
163       Module = ModuleEntry;
164       updateModuleImports(*ModuleEntry, ImportedBy, ImportLoc);
165       return AlreadyLoaded;
166     }
167   }
168 
169   // Allocate a new module.
170   auto NewModule = std::make_unique<ModuleFile>(Type, Generation);
171   NewModule->Index = Chain.size();
172   NewModule->FileName = FileName.str();
173   NewModule->File = Entry;
174   NewModule->ImportLoc = ImportLoc;
175   NewModule->InputFilesValidationTimestamp = 0;
176 
177   if (NewModule->Kind == MK_ImplicitModule) {
178     std::string TimestampFilename = NewModule->getTimestampFilename();
179     llvm::vfs::Status Status;
180     // A cached stat value would be fine as well.
181     if (!FileMgr.getNoncachedStatValue(TimestampFilename, Status))
182       NewModule->InputFilesValidationTimestamp =
183           llvm::sys::toTimeT(Status.getLastModificationTime());
184   }
185 
186   // Load the contents of the module
187   if (std::unique_ptr<llvm::MemoryBuffer> Buffer = lookupBuffer(FileName)) {
188     // The buffer was already provided for us.
189     NewModule->Buffer = &ModuleCache->addBuiltPCM(FileName, std::move(Buffer));
190     // Since the cached buffer is reused, it is safe to close the file
191     // descriptor that was opened while stat()ing the PCM in
192     // lookupModuleFile() above, it won't be needed any longer.
193     Entry->closeFile();
194   } else if (llvm::MemoryBuffer *Buffer =
195                  getModuleCache().lookupPCM(FileName)) {
196     NewModule->Buffer = Buffer;
197     // As above, the file descriptor is no longer needed.
198     Entry->closeFile();
199   } else if (getModuleCache().shouldBuildPCM(FileName)) {
200     // Report that the module is out of date, since we tried (and failed) to
201     // import it earlier.
202     Entry->closeFile();
203     return OutOfDate;
204   } else {
205     // Open the AST file.
206     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Buf((std::error_code()));
207     if (FileName == "-") {
208       Buf = llvm::MemoryBuffer::getSTDIN();
209     } else {
210       // Get a buffer of the file and close the file descriptor when done.
211       // The file is volatile because in a parallel build we expect multiple
212       // compiler processes to use the same module file rebuilding it if needed.
213       //
214       // RequiresNullTerminator is false because module files don't need it, and
215       // this allows the file to still be mmapped.
216       Buf = FileMgr.getBufferForFile(NewModule->File,
217                                      /*IsVolatile=*/true,
218                                      /*RequiresNullTerminator=*/false);
219     }
220 
221     if (!Buf) {
222       ErrorStr = Buf.getError().message();
223       return Missing;
224     }
225 
226     NewModule->Buffer = &getModuleCache().addPCM(FileName, std::move(*Buf));
227   }
228 
229   // Initialize the stream.
230   NewModule->Data = PCHContainerRdr.ExtractPCH(*NewModule->Buffer);
231 
232   // Read the signature eagerly now so that we can check it.  Avoid calling
233   // ReadSignature unless there's something to check though.
234   if (ExpectedSignature && checkSignature(ReadSignature(NewModule->Data),
235                                           ExpectedSignature, ErrorStr))
236     return OutOfDate;
237 
238   // We're keeping this module.  Store it everywhere.
239   Module = Modules[Entry] = NewModule.get();
240 
241   updateModuleImports(*NewModule, ImportedBy, ImportLoc);
242 
243   if (!NewModule->isModule())
244     PCHChain.push_back(NewModule.get());
245   if (!ImportedBy)
246     Roots.push_back(NewModule.get());
247 
248   Chain.push_back(std::move(NewModule));
249   return NewlyLoaded;
250 }
251 
252 void ModuleManager::removeModules(ModuleIterator First) {
253   auto Last = end();
254   if (First == Last)
255     return;
256 
257   // Explicitly clear VisitOrder since we might not notice it is stale.
258   VisitOrder.clear();
259 
260   // Collect the set of module file pointers that we'll be removing.
261   llvm::SmallPtrSet<ModuleFile *, 4> victimSet(
262       (llvm::pointer_iterator<ModuleIterator>(First)),
263       (llvm::pointer_iterator<ModuleIterator>(Last)));
264 
265   auto IsVictim = [&](ModuleFile *MF) {
266     return victimSet.count(MF);
267   };
268   // Remove any references to the now-destroyed modules.
269   for (auto I = begin(); I != First; ++I) {
270     I->Imports.remove_if(IsVictim);
271     I->ImportedBy.remove_if(IsVictim);
272   }
273   llvm::erase_if(Roots, IsVictim);
274 
275   // Remove the modules from the PCH chain.
276   for (auto I = First; I != Last; ++I) {
277     if (!I->isModule()) {
278       PCHChain.erase(llvm::find(PCHChain, &*I), PCHChain.end());
279       break;
280     }
281   }
282 
283   // Delete the modules.
284   for (ModuleIterator victim = First; victim != Last; ++victim)
285     Modules.erase(victim->File);
286 
287   Chain.erase(Chain.begin() + (First - begin()), Chain.end());
288 }
289 
290 void
291 ModuleManager::addInMemoryBuffer(StringRef FileName,
292                                  std::unique_ptr<llvm::MemoryBuffer> Buffer) {
293   const FileEntry *Entry =
294       FileMgr.getVirtualFile(FileName, Buffer->getBufferSize(), 0);
295   InMemoryBuffers[Entry] = std::move(Buffer);
296 }
297 
298 std::unique_ptr<ModuleManager::VisitState> ModuleManager::allocateVisitState() {
299   // Fast path: if we have a cached state, use it.
300   if (FirstVisitState) {
301     auto Result = std::move(FirstVisitState);
302     FirstVisitState = std::move(Result->NextState);
303     return Result;
304   }
305 
306   // Allocate and return a new state.
307   return std::make_unique<VisitState>(size());
308 }
309 
310 void ModuleManager::returnVisitState(std::unique_ptr<VisitState> State) {
311   assert(State->NextState == nullptr && "Visited state is in list?");
312   State->NextState = std::move(FirstVisitState);
313   FirstVisitState = std::move(State);
314 }
315 
316 void ModuleManager::setGlobalIndex(GlobalModuleIndex *Index) {
317   GlobalIndex = Index;
318   if (!GlobalIndex) {
319     ModulesInCommonWithGlobalIndex.clear();
320     return;
321   }
322 
323   // Notify the global module index about all of the modules we've already
324   // loaded.
325   for (ModuleFile &M : *this)
326     if (!GlobalIndex->loadedModuleFile(&M))
327       ModulesInCommonWithGlobalIndex.push_back(&M);
328 }
329 
330 void ModuleManager::moduleFileAccepted(ModuleFile *MF) {
331   if (!GlobalIndex || GlobalIndex->loadedModuleFile(MF))
332     return;
333 
334   ModulesInCommonWithGlobalIndex.push_back(MF);
335 }
336 
337 ModuleManager::ModuleManager(FileManager &FileMgr,
338                              InMemoryModuleCache &ModuleCache,
339                              const PCHContainerReader &PCHContainerRdr,
340                              const HeaderSearch &HeaderSearchInfo)
341     : FileMgr(FileMgr), ModuleCache(&ModuleCache),
342       PCHContainerRdr(PCHContainerRdr), HeaderSearchInfo(HeaderSearchInfo) {}
343 
344 void ModuleManager::visit(llvm::function_ref<bool(ModuleFile &M)> Visitor,
345                           llvm::SmallPtrSetImpl<ModuleFile *> *ModuleFilesHit) {
346   // If the visitation order vector is the wrong size, recompute the order.
347   if (VisitOrder.size() != Chain.size()) {
348     unsigned N = size();
349     VisitOrder.clear();
350     VisitOrder.reserve(N);
351 
352     // Record the number of incoming edges for each module. When we
353     // encounter a module with no incoming edges, push it into the queue
354     // to seed the queue.
355     SmallVector<ModuleFile *, 4> Queue;
356     Queue.reserve(N);
357     llvm::SmallVector<unsigned, 4> UnusedIncomingEdges;
358     UnusedIncomingEdges.resize(size());
359     for (ModuleFile &M : llvm::reverse(*this)) {
360       unsigned Size = M.ImportedBy.size();
361       UnusedIncomingEdges[M.Index] = Size;
362       if (!Size)
363         Queue.push_back(&M);
364     }
365 
366     // Traverse the graph, making sure to visit a module before visiting any
367     // of its dependencies.
368     while (!Queue.empty()) {
369       ModuleFile *CurrentModule = Queue.pop_back_val();
370       VisitOrder.push_back(CurrentModule);
371 
372       // For any module that this module depends on, push it on the
373       // stack (if it hasn't already been marked as visited).
374       for (ModuleFile *M : llvm::reverse(CurrentModule->Imports)) {
375         // Remove our current module as an impediment to visiting the
376         // module we depend on. If we were the last unvisited module
377         // that depends on this particular module, push it into the
378         // queue to be visited.
379         unsigned &NumUnusedEdges = UnusedIncomingEdges[M->Index];
380         if (NumUnusedEdges && (--NumUnusedEdges == 0))
381           Queue.push_back(M);
382       }
383     }
384 
385     assert(VisitOrder.size() == N && "Visitation order is wrong?");
386 
387     FirstVisitState = nullptr;
388   }
389 
390   auto State = allocateVisitState();
391   unsigned VisitNumber = State->NextVisitNumber++;
392 
393   // If the caller has provided us with a hit-set that came from the global
394   // module index, mark every module file in common with the global module
395   // index that is *not* in that set as 'visited'.
396   if (ModuleFilesHit && !ModulesInCommonWithGlobalIndex.empty()) {
397     for (unsigned I = 0, N = ModulesInCommonWithGlobalIndex.size(); I != N; ++I)
398     {
399       ModuleFile *M = ModulesInCommonWithGlobalIndex[I];
400       if (!ModuleFilesHit->count(M))
401         State->VisitNumber[M->Index] = VisitNumber;
402     }
403   }
404 
405   for (unsigned I = 0, N = VisitOrder.size(); I != N; ++I) {
406     ModuleFile *CurrentModule = VisitOrder[I];
407     // Should we skip this module file?
408     if (State->VisitNumber[CurrentModule->Index] == VisitNumber)
409       continue;
410 
411     // Visit the module.
412     assert(State->VisitNumber[CurrentModule->Index] == VisitNumber - 1);
413     State->VisitNumber[CurrentModule->Index] = VisitNumber;
414     if (!Visitor(*CurrentModule))
415       continue;
416 
417     // The visitor has requested that cut off visitation of any
418     // module that the current module depends on. To indicate this
419     // behavior, we mark all of the reachable modules as having been visited.
420     ModuleFile *NextModule = CurrentModule;
421     do {
422       // For any module that this module depends on, push it on the
423       // stack (if it hasn't already been marked as visited).
424       for (llvm::SetVector<ModuleFile *>::iterator
425              M = NextModule->Imports.begin(),
426              MEnd = NextModule->Imports.end();
427            M != MEnd; ++M) {
428         if (State->VisitNumber[(*M)->Index] != VisitNumber) {
429           State->Stack.push_back(*M);
430           State->VisitNumber[(*M)->Index] = VisitNumber;
431         }
432       }
433 
434       if (State->Stack.empty())
435         break;
436 
437       // Pop the next module off the stack.
438       NextModule = State->Stack.pop_back_val();
439     } while (true);
440   }
441 
442   returnVisitState(std::move(State));
443 }
444 
445 bool ModuleManager::lookupModuleFile(StringRef FileName, off_t ExpectedSize,
446                                      time_t ExpectedModTime,
447                                      OptionalFileEntryRef &File) {
448   File = std::nullopt;
449   if (FileName == "-")
450     return false;
451 
452   // Open the file immediately to ensure there is no race between stat'ing and
453   // opening the file.
454   OptionalFileEntryRef FileOrErr =
455       expectedToOptional(FileMgr.getFileRef(FileName, /*OpenFile=*/true,
456                                             /*CacheFailure=*/false));
457   if (!FileOrErr)
458     return false;
459 
460   File = *FileOrErr;
461 
462   if ((ExpectedSize && ExpectedSize != File->getSize()) ||
463       (ExpectedModTime && ExpectedModTime != File->getModificationTime()))
464     // Do not destroy File, as it may be referenced. If we need to rebuild it,
465     // it will be destroyed by removeModules.
466     return true;
467 
468   return false;
469 }
470 
471 #ifndef NDEBUG
472 namespace llvm {
473 
474   template<>
475   struct GraphTraits<ModuleManager> {
476     using NodeRef = ModuleFile *;
477     using ChildIteratorType = llvm::SetVector<ModuleFile *>::const_iterator;
478     using nodes_iterator = pointer_iterator<ModuleManager::ModuleConstIterator>;
479 
480     static ChildIteratorType child_begin(NodeRef Node) {
481       return Node->Imports.begin();
482     }
483 
484     static ChildIteratorType child_end(NodeRef Node) {
485       return Node->Imports.end();
486     }
487 
488     static nodes_iterator nodes_begin(const ModuleManager &Manager) {
489       return nodes_iterator(Manager.begin());
490     }
491 
492     static nodes_iterator nodes_end(const ModuleManager &Manager) {
493       return nodes_iterator(Manager.end());
494     }
495   };
496 
497   template<>
498   struct DOTGraphTraits<ModuleManager> : public DefaultDOTGraphTraits {
499     explicit DOTGraphTraits(bool IsSimple = false)
500         : DefaultDOTGraphTraits(IsSimple) {}
501 
502     static bool renderGraphFromBottomUp() { return true; }
503 
504     std::string getNodeLabel(ModuleFile *M, const ModuleManager&) {
505       return M->ModuleName;
506     }
507   };
508 
509 } // namespace llvm
510 
511 void ModuleManager::viewGraph() {
512   llvm::ViewGraph(*this, "Modules");
513 }
514 #endif
515