1 //===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
10 
11 #include "clang/Basic/MakeSupport.h"
12 #include "clang/Frontend/CompilerInstance.h"
13 #include "clang/Lex/Preprocessor.h"
14 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/Support/BLAKE3.h"
17 #include "llvm/Support/StringSaver.h"
18 #include <optional>
19 
20 using namespace clang;
21 using namespace tooling;
22 using namespace dependencies;
23 
getBuildArguments()24 const std::vector<std::string> &ModuleDeps::getBuildArguments() {
25   assert(!std::holds_alternative<std::monostate>(BuildInfo) &&
26          "Using uninitialized ModuleDeps");
27   if (const auto *CI = std::get_if<CowCompilerInvocation>(&BuildInfo))
28     BuildInfo = CI->getCC1CommandLine();
29   return std::get<std::vector<std::string>>(BuildInfo);
30 }
31 
optimizeHeaderSearchOpts(HeaderSearchOptions & Opts,ASTReader & Reader,const serialization::ModuleFile & MF)32 static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts,
33                                      ASTReader &Reader,
34                                      const serialization::ModuleFile &MF) {
35   // Only preserve search paths that were used during the dependency scan.
36   std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries;
37   Opts.UserEntries.clear();
38 
39   llvm::BitVector SearchPathUsage(Entries.size());
40   llvm::DenseSet<const serialization::ModuleFile *> Visited;
41   std::function<void(const serialization::ModuleFile *)> VisitMF =
42       [&](const serialization::ModuleFile *MF) {
43         SearchPathUsage |= MF->SearchPathUsage;
44         Visited.insert(MF);
45         for (const serialization::ModuleFile *Import : MF->Imports)
46           if (!Visited.contains(Import))
47             VisitMF(Import);
48       };
49   VisitMF(&MF);
50 
51   for (auto Idx : SearchPathUsage.set_bits())
52     Opts.UserEntries.push_back(Entries[Idx]);
53 }
54 
optimizeDiagnosticOpts(DiagnosticOptions & Opts,bool IsSystemModule)55 static void optimizeDiagnosticOpts(DiagnosticOptions &Opts,
56                                    bool IsSystemModule) {
57   // If this is not a system module or -Wsystem-headers was passed, don't
58   // optimize.
59   if (!IsSystemModule)
60     return;
61   bool Wsystem_headers = false;
62   for (StringRef Opt : Opts.Warnings) {
63     bool isPositive = !Opt.consume_front("no-");
64     if (Opt == "system-headers")
65       Wsystem_headers = isPositive;
66   }
67   if (Wsystem_headers)
68     return;
69 
70   // Remove all warning flags. System modules suppress most, but not all,
71   // warnings.
72   Opts.Warnings.clear();
73   Opts.UndefPrefixes.clear();
74   Opts.Remarks.clear();
75 }
76 
splitString(std::string S,char Separator)77 static std::vector<std::string> splitString(std::string S, char Separator) {
78   SmallVector<StringRef> Segments;
79   StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false);
80   std::vector<std::string> Result;
81   Result.reserve(Segments.size());
82   for (StringRef Segment : Segments)
83     Result.push_back(Segment.str());
84   return Result;
85 }
86 
addOutputPaths(CowCompilerInvocation & CI,ModuleDeps & Deps)87 void ModuleDepCollector::addOutputPaths(CowCompilerInvocation &CI,
88                                         ModuleDeps &Deps) {
89   CI.getMutFrontendOpts().OutputFile =
90       Controller.lookupModuleOutput(Deps.ID, ModuleOutputKind::ModuleFile);
91   if (!CI.getDiagnosticOpts().DiagnosticSerializationFile.empty())
92     CI.getMutDiagnosticOpts().DiagnosticSerializationFile =
93         Controller.lookupModuleOutput(
94             Deps.ID, ModuleOutputKind::DiagnosticSerializationFile);
95   if (!CI.getDependencyOutputOpts().OutputFile.empty()) {
96     CI.getMutDependencyOutputOpts().OutputFile = Controller.lookupModuleOutput(
97         Deps.ID, ModuleOutputKind::DependencyFile);
98     CI.getMutDependencyOutputOpts().Targets =
99         splitString(Controller.lookupModuleOutput(
100                         Deps.ID, ModuleOutputKind::DependencyTargets),
101                     '\0');
102     if (!CI.getDependencyOutputOpts().OutputFile.empty() &&
103         CI.getDependencyOutputOpts().Targets.empty()) {
104       // Fallback to -o as dependency target, as in the driver.
105       SmallString<128> Target;
106       quoteMakeTarget(CI.getFrontendOpts().OutputFile, Target);
107       CI.getMutDependencyOutputOpts().Targets.push_back(std::string(Target));
108     }
109   }
110 }
111 
112 static CowCompilerInvocation
makeCommonInvocationForModuleBuild(CompilerInvocation CI)113 makeCommonInvocationForModuleBuild(CompilerInvocation CI) {
114   CI.resetNonModularOptions();
115   CI.clearImplicitModuleBuildOptions();
116 
117   // Remove options incompatible with explicit module build or are likely to
118   // differ between identical modules discovered from different translation
119   // units.
120   CI.getFrontendOpts().Inputs.clear();
121   CI.getFrontendOpts().OutputFile.clear();
122   // LLVM options are not going to affect the AST
123   CI.getFrontendOpts().LLVMArgs.clear();
124 
125   // TODO: Figure out better way to set options to their default value.
126   CI.getCodeGenOpts().MainFileName.clear();
127   CI.getCodeGenOpts().DwarfDebugFlags.clear();
128   if (!CI.getLangOpts().ModulesCodegen) {
129     CI.getCodeGenOpts().DebugCompilationDir.clear();
130     CI.getCodeGenOpts().CoverageCompilationDir.clear();
131     CI.getCodeGenOpts().CoverageDataFile.clear();
132     CI.getCodeGenOpts().CoverageNotesFile.clear();
133   }
134 
135   // Map output paths that affect behaviour to "-" so their existence is in the
136   // context hash. The final path will be computed in addOutputPaths.
137   if (!CI.getDiagnosticOpts().DiagnosticSerializationFile.empty())
138     CI.getDiagnosticOpts().DiagnosticSerializationFile = "-";
139   if (!CI.getDependencyOutputOpts().OutputFile.empty())
140     CI.getDependencyOutputOpts().OutputFile = "-";
141   CI.getDependencyOutputOpts().Targets.clear();
142 
143   CI.getFrontendOpts().ProgramAction = frontend::GenerateModule;
144   CI.getFrontendOpts().ARCMTAction = FrontendOptions::ARCMT_None;
145   CI.getFrontendOpts().ObjCMTAction = FrontendOptions::ObjCMT_None;
146   CI.getFrontendOpts().MTMigrateDir.clear();
147   CI.getLangOpts().ModuleName.clear();
148 
149   // Remove any macro definitions that are explicitly ignored.
150   if (!CI.getHeaderSearchOpts().ModulesIgnoreMacros.empty()) {
151     llvm::erase_if(
152         CI.getPreprocessorOpts().Macros,
153         [&CI](const std::pair<std::string, bool> &Def) {
154           StringRef MacroDef = Def.first;
155           return CI.getHeaderSearchOpts().ModulesIgnoreMacros.contains(
156               llvm::CachedHashString(MacroDef.split('=').first));
157         });
158     // Remove the now unused option.
159     CI.getHeaderSearchOpts().ModulesIgnoreMacros.clear();
160   }
161 
162   return CI;
163 }
164 
165 CowCompilerInvocation
getInvocationAdjustedForModuleBuildWithoutOutputs(const ModuleDeps & Deps,llvm::function_ref<void (CowCompilerInvocation &)> Optimize) const166 ModuleDepCollector::getInvocationAdjustedForModuleBuildWithoutOutputs(
167     const ModuleDeps &Deps,
168     llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const {
169   CowCompilerInvocation CI = CommonInvocation;
170 
171   CI.getMutLangOpts().ModuleName = Deps.ID.ModuleName;
172   CI.getMutFrontendOpts().IsSystemModule = Deps.IsSystem;
173 
174   // Inputs
175   InputKind ModuleMapInputKind(CI.getFrontendOpts().DashX.getLanguage(),
176                                InputKind::Format::ModuleMap);
177   CI.getMutFrontendOpts().Inputs.emplace_back(Deps.ClangModuleMapFile,
178                                               ModuleMapInputKind);
179 
180   auto CurrentModuleMapEntry =
181       ScanInstance.getFileManager().getFile(Deps.ClangModuleMapFile);
182   assert(CurrentModuleMapEntry && "module map file entry not found");
183 
184   auto DepModuleMapFiles = collectModuleMapFiles(Deps.ClangModuleDeps);
185   for (StringRef ModuleMapFile : Deps.ModuleMapFileDeps) {
186     // TODO: Track these as `FileEntryRef` to simplify the equality check below.
187     auto ModuleMapEntry = ScanInstance.getFileManager().getFile(ModuleMapFile);
188     assert(ModuleMapEntry && "module map file entry not found");
189 
190     // Don't report module maps describing eagerly-loaded dependency. This
191     // information will be deserialized from the PCM.
192     // TODO: Verify this works fine when modulemap for module A is eagerly
193     // loaded from A.pcm, and module map passed on the command line contains
194     // definition of a submodule: "explicit module A.Private { ... }".
195     if (EagerLoadModules && DepModuleMapFiles.contains(*ModuleMapEntry))
196       continue;
197 
198     // Don't report module map file of the current module unless it also
199     // describes a dependency (for symmetry).
200     if (*ModuleMapEntry == *CurrentModuleMapEntry &&
201         !DepModuleMapFiles.contains(*ModuleMapEntry))
202       continue;
203 
204     CI.getMutFrontendOpts().ModuleMapFiles.emplace_back(ModuleMapFile);
205   }
206 
207   // Report the prebuilt modules this module uses.
208   for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
209     CI.getMutFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
210 
211   // Add module file inputs from dependencies.
212   addModuleFiles(CI, Deps.ClangModuleDeps);
213 
214   if (!CI.getDiagnosticOpts().SystemHeaderWarningsModules.empty()) {
215     // Apply -Wsystem-headers-in-module for the current module.
216     if (llvm::is_contained(CI.getDiagnosticOpts().SystemHeaderWarningsModules,
217                            Deps.ID.ModuleName))
218       CI.getMutDiagnosticOpts().Warnings.push_back("system-headers");
219     // Remove the now unused option(s).
220     CI.getMutDiagnosticOpts().SystemHeaderWarningsModules.clear();
221   }
222 
223   Optimize(CI);
224 
225   return CI;
226 }
227 
collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const228 llvm::DenseSet<const FileEntry *> ModuleDepCollector::collectModuleMapFiles(
229     ArrayRef<ModuleID> ClangModuleDeps) const {
230   llvm::DenseSet<const FileEntry *> ModuleMapFiles;
231   for (const ModuleID &MID : ClangModuleDeps) {
232     ModuleDeps *MD = ModuleDepsByID.lookup(MID);
233     assert(MD && "Inconsistent dependency info");
234     // TODO: Track ClangModuleMapFile as `FileEntryRef`.
235     auto FE = ScanInstance.getFileManager().getFile(MD->ClangModuleMapFile);
236     assert(FE && "Missing module map file that was previously found");
237     ModuleMapFiles.insert(*FE);
238   }
239   return ModuleMapFiles;
240 }
241 
addModuleMapFiles(CompilerInvocation & CI,ArrayRef<ModuleID> ClangModuleDeps) const242 void ModuleDepCollector::addModuleMapFiles(
243     CompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
244   if (EagerLoadModules)
245     return; // Only pcm is needed for eager load.
246 
247   for (const ModuleID &MID : ClangModuleDeps) {
248     ModuleDeps *MD = ModuleDepsByID.lookup(MID);
249     assert(MD && "Inconsistent dependency info");
250     CI.getFrontendOpts().ModuleMapFiles.push_back(MD->ClangModuleMapFile);
251   }
252 }
253 
addModuleFiles(CompilerInvocation & CI,ArrayRef<ModuleID> ClangModuleDeps) const254 void ModuleDepCollector::addModuleFiles(
255     CompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
256   for (const ModuleID &MID : ClangModuleDeps) {
257     std::string PCMPath =
258         Controller.lookupModuleOutput(MID, ModuleOutputKind::ModuleFile);
259     if (EagerLoadModules)
260       CI.getFrontendOpts().ModuleFiles.push_back(std::move(PCMPath));
261     else
262       CI.getHeaderSearchOpts().PrebuiltModuleFiles.insert(
263           {MID.ModuleName, std::move(PCMPath)});
264   }
265 }
266 
addModuleFiles(CowCompilerInvocation & CI,ArrayRef<ModuleID> ClangModuleDeps) const267 void ModuleDepCollector::addModuleFiles(
268     CowCompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
269   for (const ModuleID &MID : ClangModuleDeps) {
270     std::string PCMPath =
271         Controller.lookupModuleOutput(MID, ModuleOutputKind::ModuleFile);
272     if (EagerLoadModules)
273       CI.getMutFrontendOpts().ModuleFiles.push_back(std::move(PCMPath));
274     else
275       CI.getMutHeaderSearchOpts().PrebuiltModuleFiles.insert(
276           {MID.ModuleName, std::move(PCMPath)});
277   }
278 }
279 
needsModules(FrontendInputFile FIF)280 static bool needsModules(FrontendInputFile FIF) {
281   switch (FIF.getKind().getLanguage()) {
282   case Language::Unknown:
283   case Language::Asm:
284   case Language::LLVM_IR:
285     return false;
286   default:
287     return true;
288   }
289 }
290 
applyDiscoveredDependencies(CompilerInvocation & CI)291 void ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) {
292   CI.clearImplicitModuleBuildOptions();
293 
294   if (llvm::any_of(CI.getFrontendOpts().Inputs, needsModules)) {
295     Preprocessor &PP = ScanInstance.getPreprocessor();
296     if (Module *CurrentModule = PP.getCurrentModuleImplementation())
297       if (OptionalFileEntryRef CurrentModuleMap =
298               PP.getHeaderSearchInfo()
299                   .getModuleMap()
300                   .getModuleMapFileForUniquing(CurrentModule))
301         CI.getFrontendOpts().ModuleMapFiles.emplace_back(
302             CurrentModuleMap->getNameAsRequested());
303 
304     SmallVector<ModuleID> DirectDeps;
305     for (const auto &KV : ModularDeps)
306       if (DirectModularDeps.contains(KV.first))
307         DirectDeps.push_back(KV.second->ID);
308 
309     // TODO: Report module maps the same way it's done for modular dependencies.
310     addModuleMapFiles(CI, DirectDeps);
311 
312     addModuleFiles(CI, DirectDeps);
313 
314     for (const auto &KV : DirectPrebuiltModularDeps)
315       CI.getFrontendOpts().ModuleFiles.push_back(KV.second.PCMFile);
316   }
317 }
318 
getModuleContextHash(const ModuleDeps & MD,const CowCompilerInvocation & CI,bool EagerLoadModules,llvm::vfs::FileSystem & VFS)319 static std::string getModuleContextHash(const ModuleDeps &MD,
320                                         const CowCompilerInvocation &CI,
321                                         bool EagerLoadModules,
322                                         llvm::vfs::FileSystem &VFS) {
323   llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native>
324       HashBuilder;
325   SmallString<32> Scratch;
326 
327   // Hash the compiler version and serialization version to ensure the module
328   // will be readable.
329   HashBuilder.add(getClangFullRepositoryVersion());
330   HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
331   llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
332   if (CWD)
333     HashBuilder.add(*CWD);
334 
335   // Hash the BuildInvocation without any input files.
336   SmallString<0> ArgVec;
337   ArgVec.reserve(4096);
338   CI.generateCC1CommandLine([&](const Twine &Arg) {
339     Arg.toVector(ArgVec);
340     ArgVec.push_back('\0');
341   });
342   HashBuilder.add(ArgVec);
343 
344   // Hash the module dependencies. These paths may differ even if the invocation
345   // is identical if they depend on the contents of the files in the TU -- for
346   // example, case-insensitive paths to modulemap files. Usually such a case
347   // would indicate a missed optimization to canonicalize, but it may be
348   // difficult to canonicalize all cases when there is a VFS.
349   for (const auto &ID : MD.ClangModuleDeps) {
350     HashBuilder.add(ID.ModuleName);
351     HashBuilder.add(ID.ContextHash);
352   }
353 
354   HashBuilder.add(EagerLoadModules);
355 
356   llvm::BLAKE3Result<16> Hash = HashBuilder.final();
357   std::array<uint64_t, 2> Words;
358   static_assert(sizeof(Hash) == sizeof(Words), "Hash must match Words");
359   std::memcpy(Words.data(), Hash.data(), sizeof(Hash));
360   return toString(llvm::APInt(sizeof(Words) * 8, Words), 36, /*Signed=*/false);
361 }
362 
associateWithContextHash(const CowCompilerInvocation & CI,ModuleDeps & Deps)363 void ModuleDepCollector::associateWithContextHash(
364     const CowCompilerInvocation &CI, ModuleDeps &Deps) {
365   Deps.ID.ContextHash = getModuleContextHash(
366       Deps, CI, EagerLoadModules, ScanInstance.getVirtualFileSystem());
367   bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second;
368   (void)Inserted;
369   assert(Inserted && "duplicate module mapping");
370 }
371 
LexedFileChanged(FileID FID,LexedFileChangeReason Reason,SrcMgr::CharacteristicKind FileType,FileID PrevFID,SourceLocation Loc)372 void ModuleDepCollectorPP::LexedFileChanged(FileID FID,
373                                             LexedFileChangeReason Reason,
374                                             SrcMgr::CharacteristicKind FileType,
375                                             FileID PrevFID,
376                                             SourceLocation Loc) {
377   if (Reason != LexedFileChangeReason::EnterFile)
378     return;
379 
380   // This has to be delayed as the context hash can change at the start of
381   // `CompilerInstance::ExecuteAction`.
382   if (MDC.ContextHash.empty()) {
383     MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash();
384     MDC.Consumer.handleContextHash(MDC.ContextHash);
385   }
386 
387   SourceManager &SM = MDC.ScanInstance.getSourceManager();
388 
389   // Dependency generation really does want to go all the way to the
390   // file entry for a source location to find out what is depended on.
391   // We do not want #line markers to affect dependency generation!
392   if (std::optional<StringRef> Filename = SM.getNonBuiltinFilenameForID(FID))
393     MDC.addFileDep(llvm::sys::path::remove_leading_dotslash(*Filename));
394 }
395 
InclusionDirective(SourceLocation HashLoc,const Token & IncludeTok,StringRef FileName,bool IsAngled,CharSourceRange FilenameRange,OptionalFileEntryRef File,StringRef SearchPath,StringRef RelativePath,const Module * Imported,SrcMgr::CharacteristicKind FileType)396 void ModuleDepCollectorPP::InclusionDirective(
397     SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
398     bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
399     StringRef SearchPath, StringRef RelativePath, const Module *Imported,
400     SrcMgr::CharacteristicKind FileType) {
401   if (!File && !Imported) {
402     // This is a non-modular include that HeaderSearch failed to find. Add it
403     // here as `FileChanged` will never see it.
404     MDC.addFileDep(FileName);
405   }
406   handleImport(Imported);
407 }
408 
moduleImport(SourceLocation ImportLoc,ModuleIdPath Path,const Module * Imported)409 void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc,
410                                         ModuleIdPath Path,
411                                         const Module *Imported) {
412   if (MDC.ScanInstance.getPreprocessor().isInImportingCXXNamedModules()) {
413     P1689ModuleInfo RequiredModule;
414     RequiredModule.ModuleName = Path[0].first->getName().str();
415     RequiredModule.Type = P1689ModuleInfo::ModuleType::NamedCXXModule;
416     MDC.RequiredStdCXXModules.push_back(RequiredModule);
417     return;
418   }
419 
420   handleImport(Imported);
421 }
422 
handleImport(const Module * Imported)423 void ModuleDepCollectorPP::handleImport(const Module *Imported) {
424   if (!Imported)
425     return;
426 
427   const Module *TopLevelModule = Imported->getTopLevelModule();
428 
429   if (MDC.isPrebuiltModule(TopLevelModule))
430     MDC.DirectPrebuiltModularDeps.insert(
431         {TopLevelModule, PrebuiltModuleDep{TopLevelModule}});
432   else
433     MDC.DirectModularDeps.insert(TopLevelModule);
434 }
435 
EndOfMainFile()436 void ModuleDepCollectorPP::EndOfMainFile() {
437   FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID();
438   MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager()
439                                  .getFileEntryRefForID(MainFileID)
440                                  ->getName());
441 
442   auto &PP = MDC.ScanInstance.getPreprocessor();
443   if (PP.isInNamedModule()) {
444     P1689ModuleInfo ProvidedModule;
445     ProvidedModule.ModuleName = PP.getNamedModuleName();
446     ProvidedModule.Type = P1689ModuleInfo::ModuleType::NamedCXXModule;
447     ProvidedModule.IsStdCXXModuleInterface = PP.isInNamedInterfaceUnit();
448     // Don't put implementation (non partition) unit as Provide.
449     // Put the module as required instead. Since the implementation
450     // unit will import the primary module implicitly.
451     if (PP.isInImplementationUnit())
452       MDC.RequiredStdCXXModules.push_back(ProvidedModule);
453     else
454       MDC.ProvidedStdCXXModule = ProvidedModule;
455   }
456 
457   if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
458     MDC.addFileDep(MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude);
459 
460   for (const Module *M :
461        MDC.ScanInstance.getPreprocessor().getAffectingClangModules())
462     if (!MDC.isPrebuiltModule(M))
463       MDC.DirectModularDeps.insert(M);
464 
465   for (const Module *M : MDC.DirectModularDeps)
466     handleTopLevelModule(M);
467 
468   MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts);
469 
470   if (MDC.IsStdModuleP1689Format)
471     MDC.Consumer.handleProvidedAndRequiredStdCXXModules(
472         MDC.ProvidedStdCXXModule, MDC.RequiredStdCXXModules);
473 
474   for (auto &&I : MDC.ModularDeps)
475     MDC.Consumer.handleModuleDependency(*I.second);
476 
477   for (const Module *M : MDC.DirectModularDeps) {
478     auto It = MDC.ModularDeps.find(M);
479     // Only report direct dependencies that were successfully handled.
480     if (It != MDC.ModularDeps.end())
481       MDC.Consumer.handleDirectModuleDependency(MDC.ModularDeps[M]->ID);
482   }
483 
484   for (auto &&I : MDC.FileDeps)
485     MDC.Consumer.handleFileDependency(I);
486 
487   for (auto &&I : MDC.DirectPrebuiltModularDeps)
488     MDC.Consumer.handlePrebuiltModuleDependency(I.second);
489 }
490 
491 std::optional<ModuleID>
handleTopLevelModule(const Module * M)492 ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
493   assert(M == M->getTopLevelModule() && "Expected top level module!");
494 
495   // A top-level module might not be actually imported as a module when
496   // -fmodule-name is used to compile a translation unit that imports this
497   // module. In that case it can be skipped. The appropriate header
498   // dependencies will still be reported as expected.
499   if (!M->getASTFile())
500     return {};
501 
502   // If this module has been handled already, just return its ID.
503   auto ModI = MDC.ModularDeps.insert({M, nullptr});
504   if (!ModI.second)
505     return ModI.first->second->ID;
506 
507   ModI.first->second = std::make_unique<ModuleDeps>();
508   ModuleDeps &MD = *ModI.first->second;
509 
510   MD.ID.ModuleName = M->getFullModuleName();
511   MD.IsSystem = M->IsSystem;
512 
513   ModuleMap &ModMapInfo =
514       MDC.ScanInstance.getPreprocessor().getHeaderSearchInfo().getModuleMap();
515 
516   OptionalFileEntryRef ModuleMap = ModMapInfo.getModuleMapFileForUniquing(M);
517 
518   if (ModuleMap) {
519     SmallString<128> Path = ModuleMap->getNameAsRequested();
520     ModMapInfo.canonicalizeModuleMapPath(Path);
521     MD.ClangModuleMapFile = std::string(Path);
522   }
523 
524   serialization::ModuleFile *MF =
525       MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
526           *M->getASTFile());
527   MDC.ScanInstance.getASTReader()->visitInputFileInfos(
528       *MF, /*IncludeSystem=*/true,
529       [&](const serialization::InputFileInfo &IFI, bool IsSystem) {
530         // __inferred_module.map is the result of the way in which an implicit
531         // module build handles inferred modules. It adds an overlay VFS with
532         // this file in the proper directory and relies on the rest of Clang to
533         // handle it like normal. With explicitly built modules we don't need
534         // to play VFS tricks, so replace it with the correct module map.
535         if (StringRef(IFI.Filename).ends_with("__inferred_module.map")) {
536           MDC.addFileDep(MD, ModuleMap->getName());
537           return;
538         }
539         MDC.addFileDep(MD, IFI.Filename);
540       });
541 
542   llvm::DenseSet<const Module *> SeenDeps;
543   addAllSubmodulePrebuiltDeps(M, MD, SeenDeps);
544   addAllSubmoduleDeps(M, MD, SeenDeps);
545   addAllAffectingClangModules(M, MD, SeenDeps);
546 
547   MDC.ScanInstance.getASTReader()->visitInputFileInfos(
548       *MF, /*IncludeSystem=*/true,
549       [&](const serialization::InputFileInfo &IFI, bool IsSystem) {
550         if (!(IFI.TopLevel && IFI.ModuleMap))
551           return;
552         if (StringRef(IFI.FilenameAsRequested)
553                 .ends_with("__inferred_module.map"))
554           return;
555         MD.ModuleMapFileDeps.emplace_back(IFI.FilenameAsRequested);
556       });
557 
558   CowCompilerInvocation CI =
559       MDC.getInvocationAdjustedForModuleBuildWithoutOutputs(
560           MD, [&](CowCompilerInvocation &BuildInvocation) {
561             if (any(MDC.OptimizeArgs & ScanningOptimizations::HeaderSearch))
562               optimizeHeaderSearchOpts(BuildInvocation.getMutHeaderSearchOpts(),
563                                        *MDC.ScanInstance.getASTReader(), *MF);
564             if (any(MDC.OptimizeArgs & ScanningOptimizations::SystemWarnings))
565               optimizeDiagnosticOpts(
566                   BuildInvocation.getMutDiagnosticOpts(),
567                   BuildInvocation.getFrontendOpts().IsSystemModule);
568           });
569 
570   MDC.associateWithContextHash(CI, MD);
571 
572   // Finish the compiler invocation. Requires dependencies and the context hash.
573   MDC.addOutputPaths(CI, MD);
574 
575   MD.BuildInfo = std::move(CI);
576 
577   return MD.ID;
578 }
579 
forEachSubmoduleSorted(const Module * M,llvm::function_ref<void (const Module *)> F)580 static void forEachSubmoduleSorted(const Module *M,
581                                    llvm::function_ref<void(const Module *)> F) {
582   // Submodule order depends on order of header includes for inferred submodules
583   // we don't care about the exact order, so sort so that it's consistent across
584   // TUs to improve sharing.
585   SmallVector<const Module *> Submodules(M->submodules());
586   llvm::stable_sort(Submodules, [](const Module *A, const Module *B) {
587     return A->Name < B->Name;
588   });
589   for (const Module *SubM : Submodules)
590     F(SubM);
591 }
592 
addAllSubmodulePrebuiltDeps(const Module * M,ModuleDeps & MD,llvm::DenseSet<const Module * > & SeenSubmodules)593 void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps(
594     const Module *M, ModuleDeps &MD,
595     llvm::DenseSet<const Module *> &SeenSubmodules) {
596   addModulePrebuiltDeps(M, MD, SeenSubmodules);
597 
598   forEachSubmoduleSorted(M, [&](const Module *SubM) {
599     addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules);
600   });
601 }
602 
addModulePrebuiltDeps(const Module * M,ModuleDeps & MD,llvm::DenseSet<const Module * > & SeenSubmodules)603 void ModuleDepCollectorPP::addModulePrebuiltDeps(
604     const Module *M, ModuleDeps &MD,
605     llvm::DenseSet<const Module *> &SeenSubmodules) {
606   for (const Module *Import : M->Imports)
607     if (Import->getTopLevelModule() != M->getTopLevelModule())
608       if (MDC.isPrebuiltModule(Import->getTopLevelModule()))
609         if (SeenSubmodules.insert(Import->getTopLevelModule()).second)
610           MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule());
611 }
612 
addAllSubmoduleDeps(const Module * M,ModuleDeps & MD,llvm::DenseSet<const Module * > & AddedModules)613 void ModuleDepCollectorPP::addAllSubmoduleDeps(
614     const Module *M, ModuleDeps &MD,
615     llvm::DenseSet<const Module *> &AddedModules) {
616   addModuleDep(M, MD, AddedModules);
617 
618   forEachSubmoduleSorted(M, [&](const Module *SubM) {
619     addAllSubmoduleDeps(SubM, MD, AddedModules);
620   });
621 }
622 
addModuleDep(const Module * M,ModuleDeps & MD,llvm::DenseSet<const Module * > & AddedModules)623 void ModuleDepCollectorPP::addModuleDep(
624     const Module *M, ModuleDeps &MD,
625     llvm::DenseSet<const Module *> &AddedModules) {
626   for (const Module *Import : M->Imports) {
627     if (Import->getTopLevelModule() != M->getTopLevelModule() &&
628         !MDC.isPrebuiltModule(Import)) {
629       if (auto ImportID = handleTopLevelModule(Import->getTopLevelModule()))
630         if (AddedModules.insert(Import->getTopLevelModule()).second)
631           MD.ClangModuleDeps.push_back(*ImportID);
632     }
633   }
634 }
635 
addAllAffectingClangModules(const Module * M,ModuleDeps & MD,llvm::DenseSet<const Module * > & AddedModules)636 void ModuleDepCollectorPP::addAllAffectingClangModules(
637     const Module *M, ModuleDeps &MD,
638     llvm::DenseSet<const Module *> &AddedModules) {
639   addAffectingClangModule(M, MD, AddedModules);
640 
641   for (const Module *SubM : M->submodules())
642     addAllAffectingClangModules(SubM, MD, AddedModules);
643 }
644 
addAffectingClangModule(const Module * M,ModuleDeps & MD,llvm::DenseSet<const Module * > & AddedModules)645 void ModuleDepCollectorPP::addAffectingClangModule(
646     const Module *M, ModuleDeps &MD,
647     llvm::DenseSet<const Module *> &AddedModules) {
648   for (const Module *Affecting : M->AffectingClangModules) {
649     assert(Affecting == Affecting->getTopLevelModule() &&
650            "Not quite import not top-level module");
651     if (Affecting != M->getTopLevelModule() &&
652         !MDC.isPrebuiltModule(Affecting)) {
653       if (auto ImportID = handleTopLevelModule(Affecting))
654         if (AddedModules.insert(Affecting).second)
655           MD.ClangModuleDeps.push_back(*ImportID);
656     }
657   }
658 }
659 
ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,CompilerInstance & ScanInstance,DependencyConsumer & C,DependencyActionController & Controller,CompilerInvocation OriginalCI,ScanningOptimizations OptimizeArgs,bool EagerLoadModules,bool IsStdModuleP1689Format)660 ModuleDepCollector::ModuleDepCollector(
661     std::unique_ptr<DependencyOutputOptions> Opts,
662     CompilerInstance &ScanInstance, DependencyConsumer &C,
663     DependencyActionController &Controller, CompilerInvocation OriginalCI,
664     ScanningOptimizations OptimizeArgs, bool EagerLoadModules,
665     bool IsStdModuleP1689Format)
666     : ScanInstance(ScanInstance), Consumer(C), Controller(Controller),
667       Opts(std::move(Opts)),
668       CommonInvocation(
669           makeCommonInvocationForModuleBuild(std::move(OriginalCI))),
670       OptimizeArgs(OptimizeArgs), EagerLoadModules(EagerLoadModules),
671       IsStdModuleP1689Format(IsStdModuleP1689Format) {}
672 
attachToPreprocessor(Preprocessor & PP)673 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
674   PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this));
675 }
676 
attachToASTReader(ASTReader & R)677 void ModuleDepCollector::attachToASTReader(ASTReader &R) {}
678 
isPrebuiltModule(const Module * M)679 bool ModuleDepCollector::isPrebuiltModule(const Module *M) {
680   std::string Name(M->getTopLevelModuleName());
681   const auto &PrebuiltModuleFiles =
682       ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles;
683   auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name);
684   if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end())
685     return false;
686   assert("Prebuilt module came from the expected AST file" &&
687          PrebuiltModuleFileIt->second == M->getASTFile()->getName());
688   return true;
689 }
690 
makeAbsoluteAndPreferred(CompilerInstance & CI,StringRef Path,SmallVectorImpl<char> & Storage)691 static StringRef makeAbsoluteAndPreferred(CompilerInstance &CI, StringRef Path,
692                                           SmallVectorImpl<char> &Storage) {
693   if (llvm::sys::path::is_absolute(Path) &&
694       !llvm::sys::path::is_style_windows(llvm::sys::path::Style::native))
695     return Path;
696   Storage.assign(Path.begin(), Path.end());
697   CI.getFileManager().makeAbsolutePath(Storage);
698   llvm::sys::path::make_preferred(Storage);
699   return StringRef(Storage.data(), Storage.size());
700 }
701 
addFileDep(StringRef Path)702 void ModuleDepCollector::addFileDep(StringRef Path) {
703   if (IsStdModuleP1689Format) {
704     // Within P1689 format, we don't want all the paths to be absolute path
705     // since it may violate the tranditional make style dependencies info.
706     FileDeps.push_back(std::string(Path));
707     return;
708   }
709 
710   llvm::SmallString<256> Storage;
711   Path = makeAbsoluteAndPreferred(ScanInstance, Path, Storage);
712   FileDeps.push_back(std::string(Path));
713 }
714 
addFileDep(ModuleDeps & MD,StringRef Path)715 void ModuleDepCollector::addFileDep(ModuleDeps &MD, StringRef Path) {
716   if (IsStdModuleP1689Format) {
717     MD.FileDeps.insert(Path);
718     return;
719   }
720 
721   llvm::SmallString<256> Storage;
722   Path = makeAbsoluteAndPreferred(ScanInstance, Path, Storage);
723   MD.FileDeps.insert(Path);
724 }
725