1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_MODULE_DEP_COLLECTOR_H
11 #define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_MODULE_DEP_COLLECTOR_H
12 
13 #include "clang/Basic/LLVM.h"
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Frontend/CompilerInvocation.h"
16 #include "clang/Frontend/Utils.h"
17 #include "clang/Lex/HeaderSearch.h"
18 #include "clang/Lex/PPCallbacks.h"
19 #include "clang/Serialization/ASTReader.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include <string>
24 #include <unordered_map>
25 
26 namespace clang {
27 namespace tooling {
28 namespace dependencies {
29 
30 class DependencyConsumer;
31 
32 /// Modular dependency that has already been built prior to the dependency scan.
33 struct PrebuiltModuleDep {
34   std::string ModuleName;
35   std::string PCMFile;
36   std::string ModuleMapFile;
37 
PrebuiltModuleDepPrebuiltModuleDep38   explicit PrebuiltModuleDep(const Module *M)
39       : ModuleName(M->getTopLevelModuleName()),
40         PCMFile(M->getASTFile()->getName()),
41         ModuleMapFile(M->PresumedModuleMapFile) {}
42 };
43 
44 /// This is used to identify a specific module.
45 struct ModuleID {
46   /// The name of the module. This may include `:` for C++20 module partitions,
47   /// or a header-name for C++20 header units.
48   std::string ModuleName;
49 
50   /// The context hash of a module represents the set of compiler options that
51   /// may make one version of a module incompatible with another. This includes
52   /// things like language mode, predefined macros, header search paths, etc...
53   ///
54   /// Modules with the same name but a different \c ContextHash should be
55   /// treated as separate modules for the purpose of a build.
56   std::string ContextHash;
57 
58   bool operator==(const ModuleID &Other) const {
59     return ModuleName == Other.ModuleName && ContextHash == Other.ContextHash;
60   }
61 };
62 
63 struct ModuleIDHasher {
operatorModuleIDHasher64   std::size_t operator()(const ModuleID &MID) const {
65     return llvm::hash_combine(MID.ModuleName, MID.ContextHash);
66   }
67 };
68 
69 struct ModuleDeps {
70   /// The identifier of the module.
71   ModuleID ID;
72 
73   /// Whether this is a "system" module.
74   bool IsSystem;
75 
76   /// The path to the modulemap file which defines this module.
77   ///
78   /// This can be used to explicitly build this module. This file will
79   /// additionally appear in \c FileDeps as a dependency.
80   std::string ClangModuleMapFile;
81 
82   /// The path to where an implicit build would put the PCM for this module.
83   std::string ImplicitModulePCMPath;
84 
85   /// A collection of absolute paths to files that this module directly depends
86   /// on, not including transitive dependencies.
87   llvm::StringSet<> FileDeps;
88 
89   /// A collection of prebuilt modular dependencies this module directly depends
90   /// on, not including transitive dependencies.
91   std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
92 
93   /// A list of module identifiers this module directly depends on, not
94   /// including transitive dependencies.
95   ///
96   /// This may include modules with a different context hash when it can be
97   /// determined that the differences are benign for this compilation.
98   std::vector<ModuleID> ClangModuleDeps;
99 
100   // Used to track which modules that were discovered were directly imported by
101   // the primary TU.
102   bool ImportedByMainFile = false;
103 
104   /// Compiler invocation that can be used to build this module (without paths).
105   CompilerInvocation Invocation;
106 
107   /// Gets the canonical command line suitable for passing to clang.
108   ///
109   /// \param LookupPCMPath This function is called to fill in "-fmodule-file="
110   ///                      arguments and the "-o" argument. It needs to return
111   ///                      a path for where the PCM for the given module is to
112   ///                      be located.
113   /// \param LookupModuleDeps This function is called to collect the full
114   ///                         transitive set of dependencies for this
115   ///                         compilation and fill in "-fmodule-map-file="
116   ///                         arguments.
117   std::vector<std::string> getCanonicalCommandLine(
118       std::function<StringRef(ModuleID)> LookupPCMPath,
119       std::function<const ModuleDeps &(ModuleID)> LookupModuleDeps) const;
120 
121   /// Gets the canonical command line suitable for passing to clang, excluding
122   /// arguments containing modules-related paths: "-fmodule-file=", "-o",
123   /// "-fmodule-map-file=".
124   std::vector<std::string> getCanonicalCommandLineWithoutModulePaths() const;
125 };
126 
127 namespace detail {
128 /// Collect the paths of PCM and module map files for the modules in \c Modules
129 /// transitively.
130 void collectPCMAndModuleMapPaths(
131     llvm::ArrayRef<ModuleID> Modules,
132     std::function<StringRef(ModuleID)> LookupPCMPath,
133     std::function<const ModuleDeps &(ModuleID)> LookupModuleDeps,
134     std::vector<std::string> &PCMPaths, std::vector<std::string> &ModMapPaths);
135 } // namespace detail
136 
137 class ModuleDepCollector;
138 
139 /// Callback that records textual includes and direct modular includes/imports
140 /// during preprocessing. At the end of the main file, it also collects
141 /// transitive modular dependencies and passes everything to the
142 /// \c DependencyConsumer of the parent \c ModuleDepCollector.
143 class ModuleDepCollectorPP final : public PPCallbacks {
144 public:
ModuleDepCollectorPP(CompilerInstance & I,ModuleDepCollector & MDC)145   ModuleDepCollectorPP(CompilerInstance &I, ModuleDepCollector &MDC)
146       : Instance(I), MDC(MDC) {}
147 
148   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
149                    SrcMgr::CharacteristicKind FileType,
150                    FileID PrevFID) override;
151   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
152                           StringRef FileName, bool IsAngled,
153                           CharSourceRange FilenameRange, const FileEntry *File,
154                           StringRef SearchPath, StringRef RelativePath,
155                           const Module *Imported,
156                           SrcMgr::CharacteristicKind FileType) override;
157   void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
158                     const Module *Imported) override;
159 
160   void EndOfMainFile() override;
161 
162 private:
163   /// The compiler instance for the current translation unit.
164   CompilerInstance &Instance;
165   /// The parent dependency collector.
166   ModuleDepCollector &MDC;
167   /// Working set of direct modular dependencies.
168   llvm::DenseSet<const Module *> DirectModularDeps;
169   /// Working set of direct modular dependencies that have already been built.
170   llvm::DenseSet<const Module *> DirectPrebuiltModularDeps;
171 
172   void handleImport(const Module *Imported);
173 
174   /// Adds direct modular dependencies that have already been built to the
175   /// ModuleDeps instance.
176   void addDirectPrebuiltModuleDeps(const Module *M, ModuleDeps &MD);
177 
178   /// Traverses the previously collected direct modular dependencies to discover
179   /// transitive modular dependencies and fills the parent \c ModuleDepCollector
180   /// with both.
181   ModuleID handleTopLevelModule(const Module *M);
182   void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
183                            llvm::DenseSet<const Module *> &AddedModules);
184   void addModuleDep(const Module *M, ModuleDeps &MD,
185                     llvm::DenseSet<const Module *> &AddedModules);
186 };
187 
188 /// Collects modular and non-modular dependencies of the main file by attaching
189 /// \c ModuleDepCollectorPP to the preprocessor.
190 class ModuleDepCollector final : public DependencyCollector {
191 public:
192   ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,
193                      CompilerInstance &I, DependencyConsumer &C,
194                      CompilerInvocation &&OriginalCI);
195 
196   void attachToPreprocessor(Preprocessor &PP) override;
197   void attachToASTReader(ASTReader &R) override;
198 
199 private:
200   friend ModuleDepCollectorPP;
201 
202   /// The compiler instance for the current translation unit.
203   CompilerInstance &Instance;
204   /// The consumer of collected dependency information.
205   DependencyConsumer &Consumer;
206   /// Path to the main source file.
207   std::string MainFile;
208   /// Hash identifying the compilation conditions of the current TU.
209   std::string ContextHash;
210   /// Non-modular file dependencies. This includes the main source file and
211   /// textually included header files.
212   std::vector<std::string> FileDeps;
213   /// Direct and transitive modular dependencies of the main source file.
214   std::unordered_map<const Module *, ModuleDeps> ModularDeps;
215   /// Options that control the dependency output generation.
216   std::unique_ptr<DependencyOutputOptions> Opts;
217   /// The original Clang invocation passed to dependency scanner.
218   CompilerInvocation OriginalInvocation;
219 
220   /// Checks whether the module is known as being prebuilt.
221   bool isPrebuiltModule(const Module *M);
222 
223   /// Constructs a CompilerInvocation that can be used to build the given
224   /// module, excluding paths to discovered modular dependencies that are yet to
225   /// be built.
226   CompilerInvocation
227   makeInvocationForModuleBuildWithoutPaths(const ModuleDeps &Deps) const;
228 };
229 
230 } // end namespace dependencies
231 } // end namespace tooling
232 } // end namespace clang
233 
234 #endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_MODULE_DEP_COLLECTOR_H
235