1 //===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
10 
11 #include "clang/Basic/MakeSupport.h"
12 #include "clang/Frontend/CompilerInstance.h"
13 #include "clang/Lex/Preprocessor.h"
14 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
15 #include "llvm/Support/StringSaver.h"
16 
17 using namespace clang;
18 using namespace tooling;
19 using namespace dependencies;
20 
21 static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts,
22                                      ASTReader &Reader,
23                                      const serialization::ModuleFile &MF) {
24   // Only preserve search paths that were used during the dependency scan.
25   std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries;
26   Opts.UserEntries.clear();
27 
28   llvm::BitVector SearchPathUsage(Entries.size());
29   llvm::DenseSet<const serialization::ModuleFile *> Visited;
30   std::function<void(const serialization::ModuleFile *)> VisitMF =
31       [&](const serialization::ModuleFile *MF) {
32         SearchPathUsage |= MF->SearchPathUsage;
33         Visited.insert(MF);
34         for (const serialization::ModuleFile *Import : MF->Imports)
35           if (!Visited.contains(Import))
36             VisitMF(Import);
37       };
38   VisitMF(&MF);
39 
40   for (auto Idx : SearchPathUsage.set_bits())
41     Opts.UserEntries.push_back(Entries[Idx]);
42 }
43 
44 CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths(
45     const ModuleDeps &Deps,
46     llvm::function_ref<void(CompilerInvocation &)> Optimize) const {
47   // Make a deep copy of the original Clang invocation.
48   CompilerInvocation CI(OriginalInvocation);
49 
50   CI.getLangOpts()->resetNonModularOptions();
51   CI.getPreprocessorOpts().resetNonModularOptions();
52 
53   // Remove options incompatible with explicit module build or are likely to
54   // differ between identical modules discovered from different translation
55   // units.
56   CI.getFrontendOpts().Inputs.clear();
57   CI.getFrontendOpts().OutputFile.clear();
58   CI.getCodeGenOpts().MainFileName.clear();
59   CI.getCodeGenOpts().DwarfDebugFlags.clear();
60   CI.getDiagnosticOpts().DiagnosticSerializationFile.clear();
61   CI.getDependencyOutputOpts().OutputFile.clear();
62   CI.getDependencyOutputOpts().Targets.clear();
63 
64   CI.getFrontendOpts().ProgramAction = frontend::GenerateModule;
65   CI.getLangOpts()->ModuleName = Deps.ID.ModuleName;
66   CI.getFrontendOpts().IsSystemModule = Deps.IsSystem;
67 
68   // Disable implicit modules and canonicalize options that are only used by
69   // implicit modules.
70   CI.getLangOpts()->ImplicitModules = false;
71   CI.getHeaderSearchOpts().ImplicitModuleMaps = false;
72   CI.getHeaderSearchOpts().ModuleCachePath.clear();
73   CI.getHeaderSearchOpts().ModulesValidateOncePerBuildSession = false;
74   CI.getHeaderSearchOpts().BuildSessionTimestamp = 0;
75   // The specific values we canonicalize to for pruning don't affect behaviour,
76   /// so use the default values so they will be dropped from the command-line.
77   CI.getHeaderSearchOpts().ModuleCachePruneInterval = 7 * 24 * 60 * 60;
78   CI.getHeaderSearchOpts().ModuleCachePruneAfter = 31 * 24 * 60 * 60;
79 
80   // Report the prebuilt modules this module uses.
81   for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
82     CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
83 
84   CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps;
85 
86   Optimize(CI);
87 
88   // The original invocation probably didn't have strict context hash enabled.
89   // We will use the context hash of this invocation to distinguish between
90   // multiple incompatible versions of the same module and will use it when
91   // reporting dependencies to the clients. Let's make sure we're using
92   // **strict** context hash in order to prevent accidental sharing of
93   // incompatible modules (e.g. with differences in search paths).
94   CI.getHeaderSearchOpts().ModulesStrictContextHash = true;
95 
96   return CI;
97 }
98 
99 static std::vector<std::string>
100 serializeCompilerInvocation(const CompilerInvocation &CI) {
101   // Set up string allocator.
102   llvm::BumpPtrAllocator Alloc;
103   llvm::StringSaver Strings(Alloc);
104   auto SA = [&Strings](const Twine &Arg) { return Strings.save(Arg).data(); };
105 
106   // Synthesize full command line from the CompilerInvocation, including "-cc1".
107   SmallVector<const char *, 32> Args{"-cc1"};
108   CI.generateCC1CommandLine(Args, SA);
109 
110   // Convert arguments to the return type.
111   return std::vector<std::string>{Args.begin(), Args.end()};
112 }
113 
114 static std::vector<std::string> splitString(std::string S, char Separator) {
115   SmallVector<StringRef> Segments;
116   StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false);
117   std::vector<std::string> Result;
118   Result.reserve(Segments.size());
119   for (StringRef Segment : Segments)
120     Result.push_back(Segment.str());
121   return Result;
122 }
123 
124 std::vector<std::string> ModuleDeps::getCanonicalCommandLine(
125     llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)>
126         LookupModuleOutput) const {
127   CompilerInvocation CI(BuildInvocation);
128   FrontendOptions &FrontendOpts = CI.getFrontendOpts();
129 
130   InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(),
131                                InputKind::Format::ModuleMap);
132   FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind);
133   FrontendOpts.OutputFile =
134       LookupModuleOutput(ID, ModuleOutputKind::ModuleFile);
135   if (HadSerializedDiagnostics)
136     CI.getDiagnosticOpts().DiagnosticSerializationFile =
137         LookupModuleOutput(ID, ModuleOutputKind::DiagnosticSerializationFile);
138   if (HadDependencyFile) {
139     DependencyOutputOptions &DepOpts = CI.getDependencyOutputOpts();
140     DepOpts.OutputFile =
141         LookupModuleOutput(ID, ModuleOutputKind::DependencyFile);
142     DepOpts.Targets = splitString(
143         LookupModuleOutput(ID, ModuleOutputKind::DependencyTargets), '\0');
144     if (!DepOpts.OutputFile.empty() && DepOpts.Targets.empty()) {
145       // Fallback to -o as dependency target, as in the driver.
146       SmallString<128> Target;
147       quoteMakeTarget(FrontendOpts.OutputFile, Target);
148       DepOpts.Targets.push_back(std::string(Target));
149     }
150   }
151 
152   for (ModuleID MID : ClangModuleDeps)
153     FrontendOpts.ModuleFiles.push_back(
154         LookupModuleOutput(MID, ModuleOutputKind::ModuleFile));
155 
156   return serializeCompilerInvocation(CI);
157 }
158 
159 std::vector<std::string>
160 ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const {
161   return serializeCompilerInvocation(BuildInvocation);
162 }
163 
164 void ModuleDepCollectorPP::FileChanged(SourceLocation Loc,
165                                        FileChangeReason Reason,
166                                        SrcMgr::CharacteristicKind FileType,
167                                        FileID PrevFID) {
168   if (Reason != PPCallbacks::EnterFile)
169     return;
170 
171   // This has to be delayed as the context hash can change at the start of
172   // `CompilerInstance::ExecuteAction`.
173   if (MDC.ContextHash.empty()) {
174     MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash();
175     MDC.Consumer.handleContextHash(MDC.ContextHash);
176   }
177 
178   SourceManager &SM = MDC.ScanInstance.getSourceManager();
179 
180   // Dependency generation really does want to go all the way to the
181   // file entry for a source location to find out what is depended on.
182   // We do not want #line markers to affect dependency generation!
183   if (Optional<StringRef> Filename =
184           SM.getNonBuiltinFilenameForID(SM.getFileID(SM.getExpansionLoc(Loc))))
185     MDC.FileDeps.push_back(
186         std::string(llvm::sys::path::remove_leading_dotslash(*Filename)));
187 }
188 
189 void ModuleDepCollectorPP::InclusionDirective(
190     SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
191     bool IsAngled, CharSourceRange FilenameRange, Optional<FileEntryRef> File,
192     StringRef SearchPath, StringRef RelativePath, const Module *Imported,
193     SrcMgr::CharacteristicKind FileType) {
194   if (!File && !Imported) {
195     // This is a non-modular include that HeaderSearch failed to find. Add it
196     // here as `FileChanged` will never see it.
197     MDC.FileDeps.push_back(std::string(FileName));
198   }
199   handleImport(Imported);
200 }
201 
202 void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc,
203                                         ModuleIdPath Path,
204                                         const Module *Imported) {
205   handleImport(Imported);
206 }
207 
208 void ModuleDepCollectorPP::handleImport(const Module *Imported) {
209   if (!Imported)
210     return;
211 
212   const Module *TopLevelModule = Imported->getTopLevelModule();
213 
214   if (MDC.isPrebuiltModule(TopLevelModule))
215     DirectPrebuiltModularDeps.insert(TopLevelModule);
216   else
217     DirectModularDeps.insert(TopLevelModule);
218 }
219 
220 void ModuleDepCollectorPP::EndOfMainFile() {
221   FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID();
222   MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager()
223                                  .getFileEntryForID(MainFileID)
224                                  ->getName());
225 
226   if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
227     MDC.FileDeps.push_back(
228         MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude);
229 
230   for (const Module *M : DirectModularDeps) {
231     // A top-level module might not be actually imported as a module when
232     // -fmodule-name is used to compile a translation unit that imports this
233     // module. In that case it can be skipped. The appropriate header
234     // dependencies will still be reported as expected.
235     if (!M->getASTFile())
236       continue;
237     handleTopLevelModule(M);
238   }
239 
240   MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts);
241 
242   for (auto &&I : MDC.ModularDeps)
243     MDC.Consumer.handleModuleDependency(*I.second);
244 
245   for (auto &&I : MDC.FileDeps)
246     MDC.Consumer.handleFileDependency(I);
247 
248   for (auto &&I : DirectPrebuiltModularDeps)
249     MDC.Consumer.handlePrebuiltModuleDependency(PrebuiltModuleDep{I});
250 }
251 
252 ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
253   assert(M == M->getTopLevelModule() && "Expected top level module!");
254 
255   // If this module has been handled already, just return its ID.
256   auto ModI = MDC.ModularDeps.insert({M, nullptr});
257   if (!ModI.second)
258     return ModI.first->second->ID;
259 
260   ModI.first->second = std::make_unique<ModuleDeps>();
261   ModuleDeps &MD = *ModI.first->second;
262 
263   MD.ID.ModuleName = M->getFullModuleName();
264   MD.ImportedByMainFile = DirectModularDeps.contains(M);
265   MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName());
266   MD.IsSystem = M->IsSystem;
267 
268   const FileEntry *ModuleMap = MDC.ScanInstance.getPreprocessor()
269                                    .getHeaderSearchInfo()
270                                    .getModuleMap()
271                                    .getModuleMapFileForUniquing(M);
272 
273   if (ModuleMap) {
274     StringRef Path = ModuleMap->tryGetRealPathName();
275     if (Path.empty())
276       Path = ModuleMap->getName();
277     MD.ClangModuleMapFile = std::string(Path);
278   }
279 
280   serialization::ModuleFile *MF =
281       MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
282           M->getASTFile());
283   MDC.ScanInstance.getASTReader()->visitInputFiles(
284       *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) {
285         // __inferred_module.map is the result of the way in which an implicit
286         // module build handles inferred modules. It adds an overlay VFS with
287         // this file in the proper directory and relies on the rest of Clang to
288         // handle it like normal. With explicitly built modules we don't need
289         // to play VFS tricks, so replace it with the correct module map.
290         if (IF.getFile()->getName().endswith("__inferred_module.map")) {
291           MD.FileDeps.insert(ModuleMap->getName());
292           return;
293         }
294         MD.FileDeps.insert(IF.getFile()->getName());
295       });
296 
297   // We usually don't need to list the module map files of our dependencies when
298   // building a module explicitly: their semantics will be deserialized from PCM
299   // files.
300   //
301   // However, some module maps loaded implicitly during the dependency scan can
302   // describe anti-dependencies. That happens when this module, let's call it
303   // M1, is marked as '[no_undeclared_includes]' and tries to access a header
304   // "M2/M2.h" from another module, M2, but doesn't have a 'use M2;'
305   // declaration. The explicit build needs the module map for M2 so that it
306   // knows that textually including "M2/M2.h" is not allowed.
307   // E.g., '__has_include("M2/M2.h")' should return false, but without M2's
308   // module map the explicit build would return true.
309   //
310   // An alternative approach would be to tell the explicit build what its
311   // textual dependencies are, instead of having it re-discover its
312   // anti-dependencies. For example, we could create and use an `-ivfs-overlay`
313   // with `fall-through: false` that explicitly listed the dependencies.
314   // However, that's more complicated to implement and harder to reason about.
315   if (M->NoUndeclaredIncludes) {
316     // We don't have a good way to determine which module map described the
317     // anti-dependency (let alone what's the corresponding top-level module
318     // map). We simply specify all the module maps in the order they were loaded
319     // during the implicit build during scan.
320     // TODO: Resolve this by serializing and only using Module::UndeclaredUses.
321     MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps(
322         *MF, [&](const FileEntry *FE) {
323           if (FE->getName().endswith("__inferred_module.map"))
324             return;
325           // The top-level modulemap of this module will be the input file. We
326           // don't need to specify it as a module map.
327           if (FE == ModuleMap)
328             return;
329           MD.ModuleMapFileDeps.push_back(FE->getName().str());
330         });
331   }
332 
333   // Add direct prebuilt module dependencies now, so that we can use them when
334   // creating a CompilerInvocation and computing context hash for this
335   // ModuleDeps instance.
336   llvm::DenseSet<const Module *> SeenModules;
337   addAllSubmodulePrebuiltDeps(M, MD, SeenModules);
338 
339   MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutPaths(
340       MD, [&](CompilerInvocation &BuildInvocation) {
341         if (MDC.OptimizeArgs)
342           optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(),
343                                    *MDC.ScanInstance.getASTReader(), *MF);
344       });
345   MD.HadSerializedDiagnostics = !MDC.OriginalInvocation.getDiagnosticOpts()
346                                      .DiagnosticSerializationFile.empty();
347   MD.HadDependencyFile =
348       !MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty();
349   // FIXME: HadSerializedDiagnostics and HadDependencyFile should be included in
350   // the context hash since it can affect the command-line.
351   MD.ID.ContextHash = MD.BuildInvocation.getModuleHash();
352 
353   llvm::DenseSet<const Module *> AddedModules;
354   addAllSubmoduleDeps(M, MD, AddedModules);
355 
356   return MD.ID;
357 }
358 
359 static void forEachSubmoduleSorted(const Module *M,
360                                    llvm::function_ref<void(const Module *)> F) {
361   // Submodule order depends on order of header includes for inferred submodules
362   // we don't care about the exact order, so sort so that it's consistent across
363   // TUs to improve sharing.
364   SmallVector<const Module *> Submodules(M->submodule_begin(),
365                                          M->submodule_end());
366   llvm::stable_sort(Submodules, [](const Module *A, const Module *B) {
367     return A->Name < B->Name;
368   });
369   for (const Module *SubM : Submodules)
370     F(SubM);
371 }
372 
373 void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps(
374     const Module *M, ModuleDeps &MD,
375     llvm::DenseSet<const Module *> &SeenSubmodules) {
376   addModulePrebuiltDeps(M, MD, SeenSubmodules);
377 
378   forEachSubmoduleSorted(M, [&](const Module *SubM) {
379     addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules);
380   });
381 }
382 
383 void ModuleDepCollectorPP::addModulePrebuiltDeps(
384     const Module *M, ModuleDeps &MD,
385     llvm::DenseSet<const Module *> &SeenSubmodules) {
386   for (const Module *Import : M->Imports)
387     if (Import->getTopLevelModule() != M->getTopLevelModule())
388       if (MDC.isPrebuiltModule(Import->getTopLevelModule()))
389         if (SeenSubmodules.insert(Import->getTopLevelModule()).second)
390           MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule());
391 }
392 
393 void ModuleDepCollectorPP::addAllSubmoduleDeps(
394     const Module *M, ModuleDeps &MD,
395     llvm::DenseSet<const Module *> &AddedModules) {
396   addModuleDep(M, MD, AddedModules);
397 
398   forEachSubmoduleSorted(M, [&](const Module *SubM) {
399     addAllSubmoduleDeps(SubM, MD, AddedModules);
400   });
401 }
402 
403 void ModuleDepCollectorPP::addModuleDep(
404     const Module *M, ModuleDeps &MD,
405     llvm::DenseSet<const Module *> &AddedModules) {
406   for (const Module *Import : M->Imports) {
407     if (Import->getTopLevelModule() != M->getTopLevelModule() &&
408         !MDC.isPrebuiltModule(Import)) {
409       ModuleID ImportID = handleTopLevelModule(Import->getTopLevelModule());
410       if (AddedModules.insert(Import->getTopLevelModule()).second)
411         MD.ClangModuleDeps.push_back(ImportID);
412     }
413   }
414 }
415 
416 ModuleDepCollector::ModuleDepCollector(
417     std::unique_ptr<DependencyOutputOptions> Opts,
418     CompilerInstance &ScanInstance, DependencyConsumer &C,
419     CompilerInvocation &&OriginalCI, bool OptimizeArgs)
420     : ScanInstance(ScanInstance), Consumer(C), Opts(std::move(Opts)),
421       OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs) {}
422 
423 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
424   PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this));
425 }
426 
427 void ModuleDepCollector::attachToASTReader(ASTReader &R) {}
428 
429 bool ModuleDepCollector::isPrebuiltModule(const Module *M) {
430   std::string Name(M->getTopLevelModuleName());
431   const auto &PrebuiltModuleFiles =
432       ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles;
433   auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name);
434   if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end())
435     return false;
436   assert("Prebuilt module came from the expected AST file" &&
437          PrebuiltModuleFileIt->second == M->getASTFile()->getName());
438   return true;
439 }
440