1 //===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
10 
11 #include "clang/Basic/MakeSupport.h"
12 #include "clang/Frontend/CompilerInstance.h"
13 #include "clang/Lex/Preprocessor.h"
14 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
15 #include "llvm/Support/BLAKE3.h"
16 #include "llvm/Support/StringSaver.h"
17 #include <optional>
18 
19 using namespace clang;
20 using namespace tooling;
21 using namespace dependencies;
22 
23 static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts,
24                                      ASTReader &Reader,
25                                      const serialization::ModuleFile &MF) {
26   // Only preserve search paths that were used during the dependency scan.
27   std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries;
28   Opts.UserEntries.clear();
29 
30   llvm::BitVector SearchPathUsage(Entries.size());
31   llvm::DenseSet<const serialization::ModuleFile *> Visited;
32   std::function<void(const serialization::ModuleFile *)> VisitMF =
33       [&](const serialization::ModuleFile *MF) {
34         SearchPathUsage |= MF->SearchPathUsage;
35         Visited.insert(MF);
36         for (const serialization::ModuleFile *Import : MF->Imports)
37           if (!Visited.contains(Import))
38             VisitMF(Import);
39       };
40   VisitMF(&MF);
41 
42   for (auto Idx : SearchPathUsage.set_bits())
43     Opts.UserEntries.push_back(Entries[Idx]);
44 }
45 
46 static std::vector<std::string> splitString(std::string S, char Separator) {
47   SmallVector<StringRef> Segments;
48   StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false);
49   std::vector<std::string> Result;
50   Result.reserve(Segments.size());
51   for (StringRef Segment : Segments)
52     Result.push_back(Segment.str());
53   return Result;
54 }
55 
56 void ModuleDepCollector::addOutputPaths(CompilerInvocation &CI,
57                                         ModuleDeps &Deps) {
58   CI.getFrontendOpts().OutputFile =
59       Consumer.lookupModuleOutput(Deps.ID, ModuleOutputKind::ModuleFile);
60   if (!CI.getDiagnosticOpts().DiagnosticSerializationFile.empty())
61     CI.getDiagnosticOpts().DiagnosticSerializationFile =
62         Consumer.lookupModuleOutput(
63             Deps.ID, ModuleOutputKind::DiagnosticSerializationFile);
64   if (!CI.getDependencyOutputOpts().OutputFile.empty()) {
65     CI.getDependencyOutputOpts().OutputFile =
66         Consumer.lookupModuleOutput(Deps.ID, ModuleOutputKind::DependencyFile);
67     CI.getDependencyOutputOpts().Targets =
68         splitString(Consumer.lookupModuleOutput(
69                         Deps.ID, ModuleOutputKind::DependencyTargets),
70                     '\0');
71     if (!CI.getDependencyOutputOpts().OutputFile.empty() &&
72         CI.getDependencyOutputOpts().Targets.empty()) {
73       // Fallback to -o as dependency target, as in the driver.
74       SmallString<128> Target;
75       quoteMakeTarget(CI.getFrontendOpts().OutputFile, Target);
76       CI.getDependencyOutputOpts().Targets.push_back(std::string(Target));
77     }
78   }
79 }
80 
81 CompilerInvocation
82 ModuleDepCollector::makeInvocationForModuleBuildWithoutOutputs(
83     const ModuleDeps &Deps,
84     llvm::function_ref<void(CompilerInvocation &)> Optimize) const {
85   // Make a deep copy of the original Clang invocation.
86   CompilerInvocation CI(OriginalInvocation);
87 
88   CI.resetNonModularOptions();
89   CI.clearImplicitModuleBuildOptions();
90 
91   // Remove options incompatible with explicit module build or are likely to
92   // differ between identical modules discovered from different translation
93   // units.
94   CI.getFrontendOpts().Inputs.clear();
95   CI.getFrontendOpts().OutputFile.clear();
96 
97   // TODO: Figure out better way to set options to their default value.
98   CI.getCodeGenOpts().MainFileName.clear();
99   CI.getCodeGenOpts().DwarfDebugFlags.clear();
100   if (!CI.getLangOpts()->ModulesCodegen) {
101     CI.getCodeGenOpts().DebugCompilationDir.clear();
102     CI.getCodeGenOpts().CoverageCompilationDir.clear();
103   }
104 
105   // Map output paths that affect behaviour to "-" so their existence is in the
106   // context hash. The final path will be computed in addOutputPaths.
107   if (!CI.getDiagnosticOpts().DiagnosticSerializationFile.empty())
108     CI.getDiagnosticOpts().DiagnosticSerializationFile = "-";
109   if (!CI.getDependencyOutputOpts().OutputFile.empty())
110     CI.getDependencyOutputOpts().OutputFile = "-";
111   CI.getDependencyOutputOpts().Targets.clear();
112 
113   CI.getFrontendOpts().ProgramAction = frontend::GenerateModule;
114   CI.getLangOpts()->ModuleName = Deps.ID.ModuleName;
115   CI.getFrontendOpts().IsSystemModule = Deps.IsSystem;
116 
117   // Inputs
118   InputKind ModuleMapInputKind(CI.getFrontendOpts().DashX.getLanguage(),
119                                InputKind::Format::ModuleMap);
120   CI.getFrontendOpts().Inputs.emplace_back(Deps.ClangModuleMapFile,
121                                            ModuleMapInputKind);
122 
123   auto CurrentModuleMapEntry =
124       ScanInstance.getFileManager().getFile(Deps.ClangModuleMapFile);
125   assert(CurrentModuleMapEntry && "module map file entry not found");
126 
127   auto DepModuleMapFiles = collectModuleMapFiles(Deps.ClangModuleDeps);
128   for (StringRef ModuleMapFile : Deps.ModuleMapFileDeps) {
129     // TODO: Track these as `FileEntryRef` to simplify the equality check below.
130     auto ModuleMapEntry = ScanInstance.getFileManager().getFile(ModuleMapFile);
131     assert(ModuleMapEntry && "module map file entry not found");
132 
133     // Don't report module maps describing eagerly-loaded dependency. This
134     // information will be deserialized from the PCM.
135     // TODO: Verify this works fine when modulemap for module A is eagerly
136     // loaded from A.pcm, and module map passed on the command line contains
137     // definition of a submodule: "explicit module A.Private { ... }".
138     if (EagerLoadModules && DepModuleMapFiles.contains(*ModuleMapEntry))
139       continue;
140 
141     // Don't report module map file of the current module unless it also
142     // describes a dependency (for symmetry).
143     if (*ModuleMapEntry == *CurrentModuleMapEntry &&
144         !DepModuleMapFiles.contains(*ModuleMapEntry))
145       continue;
146 
147     CI.getFrontendOpts().ModuleMapFiles.emplace_back(ModuleMapFile);
148   }
149 
150   // Report the prebuilt modules this module uses.
151   for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
152     CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
153 
154   // Add module file inputs from dependencies.
155   addModuleFiles(CI, Deps.ClangModuleDeps);
156 
157   // Remove any macro definitions that are explicitly ignored.
158   if (!CI.getHeaderSearchOpts().ModulesIgnoreMacros.empty()) {
159     llvm::erase_if(
160         CI.getPreprocessorOpts().Macros,
161         [&CI](const std::pair<std::string, bool> &Def) {
162           StringRef MacroDef = Def.first;
163           return CI.getHeaderSearchOpts().ModulesIgnoreMacros.contains(
164               llvm::CachedHashString(MacroDef.split('=').first));
165         });
166     // Remove the now unused option.
167     CI.getHeaderSearchOpts().ModulesIgnoreMacros.clear();
168   }
169 
170   Optimize(CI);
171 
172   return CI;
173 }
174 
175 llvm::DenseSet<const FileEntry *> ModuleDepCollector::collectModuleMapFiles(
176     ArrayRef<ModuleID> ClangModuleDeps) const {
177   llvm::DenseSet<const FileEntry *> ModuleMapFiles;
178   for (const ModuleID &MID : ClangModuleDeps) {
179     ModuleDeps *MD = ModuleDepsByID.lookup(MID);
180     assert(MD && "Inconsistent dependency info");
181     // TODO: Track ClangModuleMapFile as `FileEntryRef`.
182     auto FE = ScanInstance.getFileManager().getFile(MD->ClangModuleMapFile);
183     assert(FE && "Missing module map file that was previously found");
184     ModuleMapFiles.insert(*FE);
185   }
186   return ModuleMapFiles;
187 }
188 
189 void ModuleDepCollector::addModuleMapFiles(
190     CompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
191   if (EagerLoadModules)
192     return; // Only pcm is needed for eager load.
193 
194   for (const ModuleID &MID : ClangModuleDeps) {
195     ModuleDeps *MD = ModuleDepsByID.lookup(MID);
196     assert(MD && "Inconsistent dependency info");
197     CI.getFrontendOpts().ModuleMapFiles.push_back(MD->ClangModuleMapFile);
198   }
199 }
200 
201 void ModuleDepCollector::addModuleFiles(
202     CompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
203   for (const ModuleID &MID : ClangModuleDeps) {
204     std::string PCMPath =
205         Consumer.lookupModuleOutput(MID, ModuleOutputKind::ModuleFile);
206     if (EagerLoadModules)
207       CI.getFrontendOpts().ModuleFiles.push_back(std::move(PCMPath));
208     else
209       CI.getHeaderSearchOpts().PrebuiltModuleFiles.insert(
210           {MID.ModuleName, std::move(PCMPath)});
211   }
212 }
213 
214 static bool needsModules(FrontendInputFile FIF) {
215   switch (FIF.getKind().getLanguage()) {
216   case Language::Unknown:
217   case Language::Asm:
218   case Language::LLVM_IR:
219     return false;
220   default:
221     return true;
222   }
223 }
224 
225 void ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) {
226   CI.clearImplicitModuleBuildOptions();
227 
228   if (llvm::any_of(CI.getFrontendOpts().Inputs, needsModules)) {
229     Preprocessor &PP = ScanInstance.getPreprocessor();
230     if (Module *CurrentModule = PP.getCurrentModuleImplementation())
231       if (OptionalFileEntryRef CurrentModuleMap =
232               PP.getHeaderSearchInfo()
233                   .getModuleMap()
234                   .getModuleMapFileForUniquing(CurrentModule))
235         CI.getFrontendOpts().ModuleMapFiles.emplace_back(
236             CurrentModuleMap->getName());
237 
238     SmallVector<ModuleID> DirectDeps;
239     for (const auto &KV : ModularDeps)
240       if (KV.second->ImportedByMainFile)
241         DirectDeps.push_back(KV.second->ID);
242 
243     // TODO: Report module maps the same way it's done for modular dependencies.
244     addModuleMapFiles(CI, DirectDeps);
245 
246     addModuleFiles(CI, DirectDeps);
247 
248     for (const auto &KV : DirectPrebuiltModularDeps)
249       CI.getFrontendOpts().ModuleFiles.push_back(KV.second.PCMFile);
250   }
251 }
252 
253 static std::string getModuleContextHash(const ModuleDeps &MD,
254                                         const CompilerInvocation &CI,
255                                         bool EagerLoadModules) {
256   llvm::HashBuilder<llvm::TruncatedBLAKE3<16>,
257                     llvm::support::endianness::native>
258       HashBuilder;
259   SmallString<32> Scratch;
260 
261   // Hash the compiler version and serialization version to ensure the module
262   // will be readable.
263   HashBuilder.add(getClangFullRepositoryVersion());
264   HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
265 
266   // Hash the BuildInvocation without any input files.
267   SmallVector<const char *, 32> DummyArgs;
268   CI.generateCC1CommandLine(DummyArgs, [&](const Twine &Arg) {
269     Scratch.clear();
270     StringRef Str = Arg.toStringRef(Scratch);
271     HashBuilder.add(Str);
272     return "<unused>";
273   });
274 
275   // Hash the module dependencies. These paths may differ even if the invocation
276   // is identical if they depend on the contents of the files in the TU -- for
277   // example, case-insensitive paths to modulemap files. Usually such a case
278   // would indicate a missed optimization to canonicalize, but it may be
279   // difficult to canonicalize all cases when there is a VFS.
280   for (const auto &ID : MD.ClangModuleDeps) {
281     HashBuilder.add(ID.ModuleName);
282     HashBuilder.add(ID.ContextHash);
283   }
284 
285   HashBuilder.add(EagerLoadModules);
286 
287   llvm::BLAKE3Result<16> Hash = HashBuilder.final();
288   std::array<uint64_t, 2> Words;
289   static_assert(sizeof(Hash) == sizeof(Words), "Hash must match Words");
290   std::memcpy(Words.data(), Hash.data(), sizeof(Hash));
291   return toString(llvm::APInt(sizeof(Words) * 8, Words), 36, /*Signed=*/false);
292 }
293 
294 void ModuleDepCollector::associateWithContextHash(const CompilerInvocation &CI,
295                                                   ModuleDeps &Deps) {
296   Deps.ID.ContextHash = getModuleContextHash(Deps, CI, EagerLoadModules);
297   bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second;
298   (void)Inserted;
299   assert(Inserted && "duplicate module mapping");
300 }
301 
302 void ModuleDepCollectorPP::FileChanged(SourceLocation Loc,
303                                        FileChangeReason Reason,
304                                        SrcMgr::CharacteristicKind FileType,
305                                        FileID PrevFID) {
306   if (Reason != PPCallbacks::EnterFile)
307     return;
308 
309   // This has to be delayed as the context hash can change at the start of
310   // `CompilerInstance::ExecuteAction`.
311   if (MDC.ContextHash.empty()) {
312     MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash();
313     MDC.Consumer.handleContextHash(MDC.ContextHash);
314   }
315 
316   SourceManager &SM = MDC.ScanInstance.getSourceManager();
317 
318   // Dependency generation really does want to go all the way to the
319   // file entry for a source location to find out what is depended on.
320   // We do not want #line markers to affect dependency generation!
321   if (std::optional<StringRef> Filename =
322           SM.getNonBuiltinFilenameForID(SM.getFileID(SM.getExpansionLoc(Loc))))
323     MDC.addFileDep(llvm::sys::path::remove_leading_dotslash(*Filename));
324 }
325 
326 void ModuleDepCollectorPP::InclusionDirective(
327     SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
328     bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
329     StringRef SearchPath, StringRef RelativePath, const Module *Imported,
330     SrcMgr::CharacteristicKind FileType) {
331   if (!File && !Imported) {
332     // This is a non-modular include that HeaderSearch failed to find. Add it
333     // here as `FileChanged` will never see it.
334     MDC.addFileDep(FileName);
335   }
336   handleImport(Imported);
337 }
338 
339 void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc,
340                                         ModuleIdPath Path,
341                                         const Module *Imported) {
342   if (MDC.ScanInstance.getPreprocessor().isInImportingCXXNamedModules()) {
343     P1689ModuleInfo RequiredModule;
344     RequiredModule.ModuleName = Path[0].first->getName().str();
345     RequiredModule.Type = P1689ModuleInfo::ModuleType::NamedCXXModule;
346     MDC.RequiredStdCXXModules.push_back(RequiredModule);
347     return;
348   }
349 
350   handleImport(Imported);
351 }
352 
353 void ModuleDepCollectorPP::handleImport(const Module *Imported) {
354   if (!Imported)
355     return;
356 
357   const Module *TopLevelModule = Imported->getTopLevelModule();
358 
359   if (MDC.isPrebuiltModule(TopLevelModule))
360     MDC.DirectPrebuiltModularDeps.insert(
361         {TopLevelModule, PrebuiltModuleDep{TopLevelModule}});
362   else
363     DirectModularDeps.insert(TopLevelModule);
364 }
365 
366 void ModuleDepCollectorPP::EndOfMainFile() {
367   FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID();
368   MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager()
369                                  .getFileEntryForID(MainFileID)
370                                  ->getName());
371 
372   auto &PP = MDC.ScanInstance.getPreprocessor();
373   if (PP.isInNamedModule()) {
374     P1689ModuleInfo ProvidedModule;
375     ProvidedModule.ModuleName = PP.getNamedModuleName();
376     ProvidedModule.Type = P1689ModuleInfo::ModuleType::NamedCXXModule;
377     ProvidedModule.IsStdCXXModuleInterface = PP.isInNamedInterfaceUnit();
378     // Don't put implementation (non partition) unit as Provide.
379     // Put the module as required instead. Since the implementation
380     // unit will import the primary module implicitly.
381     if (PP.isInImplementationUnit())
382       MDC.RequiredStdCXXModules.push_back(ProvidedModule);
383     else
384       MDC.ProvidedStdCXXModule = ProvidedModule;
385   }
386 
387   if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
388     MDC.addFileDep(MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude);
389 
390   for (const Module *M :
391        MDC.ScanInstance.getPreprocessor().getAffectingClangModules())
392     if (!MDC.isPrebuiltModule(M))
393       DirectModularDeps.insert(M);
394 
395   for (const Module *M : DirectModularDeps)
396     handleTopLevelModule(M);
397 
398   MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts);
399 
400   if (MDC.IsStdModuleP1689Format)
401     MDC.Consumer.handleProvidedAndRequiredStdCXXModules(
402         MDC.ProvidedStdCXXModule, MDC.RequiredStdCXXModules);
403 
404   for (auto &&I : MDC.ModularDeps)
405     MDC.Consumer.handleModuleDependency(*I.second);
406 
407   for (auto &&I : MDC.FileDeps)
408     MDC.Consumer.handleFileDependency(I);
409 
410   for (auto &&I : MDC.DirectPrebuiltModularDeps)
411     MDC.Consumer.handlePrebuiltModuleDependency(I.second);
412 }
413 
414 std::optional<ModuleID>
415 ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
416   assert(M == M->getTopLevelModule() && "Expected top level module!");
417 
418   // A top-level module might not be actually imported as a module when
419   // -fmodule-name is used to compile a translation unit that imports this
420   // module. In that case it can be skipped. The appropriate header
421   // dependencies will still be reported as expected.
422   if (!M->getASTFile())
423     return {};
424 
425   // If this module has been handled already, just return its ID.
426   auto ModI = MDC.ModularDeps.insert({M, nullptr});
427   if (!ModI.second)
428     return ModI.first->second->ID;
429 
430   ModI.first->second = std::make_unique<ModuleDeps>();
431   ModuleDeps &MD = *ModI.first->second;
432 
433   MD.ID.ModuleName = M->getFullModuleName();
434   MD.ImportedByMainFile = DirectModularDeps.contains(M);
435   MD.IsSystem = M->IsSystem;
436 
437   ModuleMap &ModMapInfo =
438       MDC.ScanInstance.getPreprocessor().getHeaderSearchInfo().getModuleMap();
439 
440   OptionalFileEntryRef ModuleMap = ModMapInfo.getModuleMapFileForUniquing(M);
441 
442   if (ModuleMap) {
443     SmallString<128> Path = ModuleMap->getNameAsRequested();
444     ModMapInfo.canonicalizeModuleMapPath(Path);
445     MD.ClangModuleMapFile = std::string(Path);
446   }
447 
448   serialization::ModuleFile *MF =
449       MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
450           M->getASTFile());
451   MDC.ScanInstance.getASTReader()->visitInputFiles(
452       *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) {
453         // __inferred_module.map is the result of the way in which an implicit
454         // module build handles inferred modules. It adds an overlay VFS with
455         // this file in the proper directory and relies on the rest of Clang to
456         // handle it like normal. With explicitly built modules we don't need
457         // to play VFS tricks, so replace it with the correct module map.
458         if (IF.getFile()->getName().endswith("__inferred_module.map")) {
459           MDC.addFileDep(MD, ModuleMap->getName());
460           return;
461         }
462         MDC.addFileDep(MD, IF.getFile()->getName());
463       });
464 
465   llvm::DenseSet<const Module *> SeenDeps;
466   addAllSubmodulePrebuiltDeps(M, MD, SeenDeps);
467   addAllSubmoduleDeps(M, MD, SeenDeps);
468   addAllAffectingClangModules(M, MD, SeenDeps);
469 
470   MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps(
471       *MF, [&](FileEntryRef FE) {
472         if (FE.getNameAsRequested().endswith("__inferred_module.map"))
473           return;
474         MD.ModuleMapFileDeps.emplace_back(FE.getNameAsRequested());
475       });
476 
477   CompilerInvocation CI = MDC.makeInvocationForModuleBuildWithoutOutputs(
478       MD, [&](CompilerInvocation &BuildInvocation) {
479         if (MDC.OptimizeArgs)
480           optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(),
481                                    *MDC.ScanInstance.getASTReader(), *MF);
482       });
483 
484   MDC.associateWithContextHash(CI, MD);
485 
486   // Finish the compiler invocation. Requires dependencies and the context hash.
487   MDC.addOutputPaths(CI, MD);
488 
489   MD.BuildArguments = CI.getCC1CommandLine();
490 
491   return MD.ID;
492 }
493 
494 static void forEachSubmoduleSorted(const Module *M,
495                                    llvm::function_ref<void(const Module *)> F) {
496   // Submodule order depends on order of header includes for inferred submodules
497   // we don't care about the exact order, so sort so that it's consistent across
498   // TUs to improve sharing.
499   SmallVector<const Module *> Submodules(M->submodule_begin(),
500                                          M->submodule_end());
501   llvm::stable_sort(Submodules, [](const Module *A, const Module *B) {
502     return A->Name < B->Name;
503   });
504   for (const Module *SubM : Submodules)
505     F(SubM);
506 }
507 
508 void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps(
509     const Module *M, ModuleDeps &MD,
510     llvm::DenseSet<const Module *> &SeenSubmodules) {
511   addModulePrebuiltDeps(M, MD, SeenSubmodules);
512 
513   forEachSubmoduleSorted(M, [&](const Module *SubM) {
514     addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules);
515   });
516 }
517 
518 void ModuleDepCollectorPP::addModulePrebuiltDeps(
519     const Module *M, ModuleDeps &MD,
520     llvm::DenseSet<const Module *> &SeenSubmodules) {
521   for (const Module *Import : M->Imports)
522     if (Import->getTopLevelModule() != M->getTopLevelModule())
523       if (MDC.isPrebuiltModule(Import->getTopLevelModule()))
524         if (SeenSubmodules.insert(Import->getTopLevelModule()).second)
525           MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule());
526 }
527 
528 void ModuleDepCollectorPP::addAllSubmoduleDeps(
529     const Module *M, ModuleDeps &MD,
530     llvm::DenseSet<const Module *> &AddedModules) {
531   addModuleDep(M, MD, AddedModules);
532 
533   forEachSubmoduleSorted(M, [&](const Module *SubM) {
534     addAllSubmoduleDeps(SubM, MD, AddedModules);
535   });
536 }
537 
538 void ModuleDepCollectorPP::addModuleDep(
539     const Module *M, ModuleDeps &MD,
540     llvm::DenseSet<const Module *> &AddedModules) {
541   for (const Module *Import : M->Imports) {
542     if (Import->getTopLevelModule() != M->getTopLevelModule() &&
543         !MDC.isPrebuiltModule(Import)) {
544       if (auto ImportID = handleTopLevelModule(Import->getTopLevelModule()))
545         if (AddedModules.insert(Import->getTopLevelModule()).second)
546           MD.ClangModuleDeps.push_back(*ImportID);
547     }
548   }
549 }
550 
551 void ModuleDepCollectorPP::addAllAffectingClangModules(
552     const Module *M, ModuleDeps &MD,
553     llvm::DenseSet<const Module *> &AddedModules) {
554   addAffectingClangModule(M, MD, AddedModules);
555 
556   for (const Module *SubM : M->submodules())
557     addAllAffectingClangModules(SubM, MD, AddedModules);
558 }
559 
560 void ModuleDepCollectorPP::addAffectingClangModule(
561     const Module *M, ModuleDeps &MD,
562     llvm::DenseSet<const Module *> &AddedModules) {
563   for (const Module *Affecting : M->AffectingClangModules) {
564     assert(Affecting == Affecting->getTopLevelModule() &&
565            "Not quite import not top-level module");
566     if (Affecting != M->getTopLevelModule() &&
567         !MDC.isPrebuiltModule(Affecting)) {
568       if (auto ImportID = handleTopLevelModule(Affecting))
569         if (AddedModules.insert(Affecting).second)
570           MD.ClangModuleDeps.push_back(*ImportID);
571     }
572   }
573 }
574 
575 ModuleDepCollector::ModuleDepCollector(
576     std::unique_ptr<DependencyOutputOptions> Opts,
577     CompilerInstance &ScanInstance, DependencyConsumer &C,
578     CompilerInvocation OriginalCI, bool OptimizeArgs, bool EagerLoadModules,
579     bool IsStdModuleP1689Format)
580     : ScanInstance(ScanInstance), Consumer(C), Opts(std::move(Opts)),
581       OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs),
582       EagerLoadModules(EagerLoadModules),
583       IsStdModuleP1689Format(IsStdModuleP1689Format) {}
584 
585 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
586   PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this));
587 }
588 
589 void ModuleDepCollector::attachToASTReader(ASTReader &R) {}
590 
591 bool ModuleDepCollector::isPrebuiltModule(const Module *M) {
592   std::string Name(M->getTopLevelModuleName());
593   const auto &PrebuiltModuleFiles =
594       ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles;
595   auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name);
596   if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end())
597     return false;
598   assert("Prebuilt module came from the expected AST file" &&
599          PrebuiltModuleFileIt->second == M->getASTFile()->getName());
600   return true;
601 }
602 
603 static StringRef makeAbsoluteAndPreferred(CompilerInstance &CI, StringRef Path,
604                                           SmallVectorImpl<char> &Storage) {
605   if (llvm::sys::path::is_absolute(Path) &&
606       !llvm::sys::path::is_style_windows(llvm::sys::path::Style::native))
607     return Path;
608   Storage.assign(Path.begin(), Path.end());
609   CI.getFileManager().makeAbsolutePath(Storage);
610   llvm::sys::path::make_preferred(Storage);
611   return StringRef(Storage.data(), Storage.size());
612 }
613 
614 void ModuleDepCollector::addFileDep(StringRef Path) {
615   llvm::SmallString<256> Storage;
616   Path = makeAbsoluteAndPreferred(ScanInstance, Path, Storage);
617   FileDeps.push_back(std::string(Path));
618 }
619 
620 void ModuleDepCollector::addFileDep(ModuleDeps &MD, StringRef Path) {
621   llvm::SmallString<256> Storage;
622   Path = makeAbsoluteAndPreferred(ScanInstance, Path, Storage);
623   MD.FileDeps.insert(Path);
624 }
625