1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
11 
12 #include "clang/Basic/LLVM.h"
13 #include "clang/Basic/SourceManager.h"
14 #include "clang/Frontend/CompilerInvocation.h"
15 #include "clang/Frontend/Utils.h"
16 #include "clang/Lex/HeaderSearch.h"
17 #include "clang/Lex/PPCallbacks.h"
18 #include "clang/Serialization/ASTReader.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/StringSet.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include <optional>
23 #include <string>
24 #include <unordered_map>
25 
26 namespace clang {
27 namespace tooling {
28 namespace dependencies {
29 
30 class DependencyActionController;
31 class DependencyConsumer;
32 
33 /// Modular dependency that has already been built prior to the dependency scan.
34 struct PrebuiltModuleDep {
35   std::string ModuleName;
36   std::string PCMFile;
37   std::string ModuleMapFile;
38 
39   explicit PrebuiltModuleDep(const Module *M)
40       : ModuleName(M->getTopLevelModuleName()),
41         PCMFile(M->getASTFile()->getName()),
42         ModuleMapFile(M->PresumedModuleMapFile) {}
43 };
44 
45 /// This is used to identify a specific module.
46 struct ModuleID {
47   /// The name of the module. This may include `:` for C++20 module partitions,
48   /// or a header-name for C++20 header units.
49   std::string ModuleName;
50 
51   /// The context hash of a module represents the compiler options that affect
52   /// the resulting command-line invocation.
53   ///
54   /// Modules with the same name and ContextHash but different invocations could
55   /// cause non-deterministic build results.
56   ///
57   /// Modules with the same name but a different \c ContextHash should be
58   /// treated as separate modules for the purpose of a build.
59   std::string ContextHash;
60 
61   bool operator==(const ModuleID &Other) const {
62     return std::tie(ModuleName, ContextHash) ==
63            std::tie(Other.ModuleName, Other.ContextHash);
64   }
65 
66   bool operator<(const ModuleID& Other) const {
67     return std::tie(ModuleName, ContextHash) <
68            std::tie(Other.ModuleName, Other.ContextHash);
69   }
70 };
71 
72 /// P1689ModuleInfo - Represents the needed information of standard C++20
73 /// modules for P1689 format.
74 struct P1689ModuleInfo {
75   /// The name of the module. This may include `:` for partitions.
76   std::string ModuleName;
77 
78   /// Optional. The source path to the module.
79   std::string SourcePath;
80 
81   /// If this module is a standard c++ interface unit.
82   bool IsStdCXXModuleInterface = true;
83 
84   enum class ModuleType {
85     NamedCXXModule
86     // To be supported
87     // AngleHeaderUnit,
88     // QuoteHeaderUnit
89   };
90   ModuleType Type = ModuleType::NamedCXXModule;
91 };
92 
93 /// An output from a module compilation, such as the path of the module file.
94 enum class ModuleOutputKind {
95   /// The module file (.pcm). Required.
96   ModuleFile,
97   /// The path of the dependency file (.d), if any.
98   DependencyFile,
99   /// The null-separated list of names to use as the targets in the dependency
100   /// file, if any. Defaults to the value of \c ModuleFile, as in the driver.
101   DependencyTargets,
102   /// The path of the serialized diagnostic file (.dia), if any.
103   DiagnosticSerializationFile,
104 };
105 
106 struct ModuleDeps {
107   /// The identifier of the module.
108   ModuleID ID;
109 
110   /// Whether this is a "system" module.
111   bool IsSystem;
112 
113   /// The path to the modulemap file which defines this module.
114   ///
115   /// This can be used to explicitly build this module. This file will
116   /// additionally appear in \c FileDeps as a dependency.
117   std::string ClangModuleMapFile;
118 
119   /// A collection of absolute paths to files that this module directly depends
120   /// on, not including transitive dependencies.
121   llvm::StringSet<> FileDeps;
122 
123   /// A collection of absolute paths to module map files that this module needs
124   /// to know about. The ordering is significant.
125   std::vector<std::string> ModuleMapFileDeps;
126 
127   /// A collection of prebuilt modular dependencies this module directly depends
128   /// on, not including transitive dependencies.
129   std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
130 
131   /// A list of module identifiers this module directly depends on, not
132   /// including transitive dependencies.
133   ///
134   /// This may include modules with a different context hash when it can be
135   /// determined that the differences are benign for this compilation.
136   std::vector<ModuleID> ClangModuleDeps;
137 
138   // Used to track which modules that were discovered were directly imported by
139   // the primary TU.
140   bool ImportedByMainFile = false;
141 
142   /// Compiler invocation that can be used to build this module. Does not
143   /// include argv[0].
144   std::vector<std::string> BuildArguments;
145 };
146 
147 class ModuleDepCollector;
148 
149 /// Callback that records textual includes and direct modular includes/imports
150 /// during preprocessing. At the end of the main file, it also collects
151 /// transitive modular dependencies and passes everything to the
152 /// \c DependencyConsumer of the parent \c ModuleDepCollector.
153 class ModuleDepCollectorPP final : public PPCallbacks {
154 public:
155   ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {}
156 
157   void LexedFileChanged(FileID FID, LexedFileChangeReason Reason,
158                         SrcMgr::CharacteristicKind FileType, FileID PrevFID,
159                         SourceLocation Loc) override;
160   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
161                           StringRef FileName, bool IsAngled,
162                           CharSourceRange FilenameRange,
163                           OptionalFileEntryRef File, StringRef SearchPath,
164                           StringRef RelativePath, const Module *Imported,
165                           SrcMgr::CharacteristicKind FileType) override;
166   void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
167                     const Module *Imported) override;
168 
169   void EndOfMainFile() override;
170 
171 private:
172   /// The parent dependency collector.
173   ModuleDepCollector &MDC;
174   /// Working set of direct modular dependencies.
175   llvm::SetVector<const Module *> DirectModularDeps;
176 
177   void handleImport(const Module *Imported);
178 
179   /// Adds direct modular dependencies that have already been built to the
180   /// ModuleDeps instance.
181   void
182   addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
183                               llvm::DenseSet<const Module *> &SeenSubmodules);
184   void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
185                              llvm::DenseSet<const Module *> &SeenSubmodules);
186 
187   /// Traverses the previously collected direct modular dependencies to discover
188   /// transitive modular dependencies and fills the parent \c ModuleDepCollector
189   /// with both.
190   /// Returns the ID or nothing if the dependency is spurious and is ignored.
191   std::optional<ModuleID> handleTopLevelModule(const Module *M);
192   void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
193                            llvm::DenseSet<const Module *> &AddedModules);
194   void addModuleDep(const Module *M, ModuleDeps &MD,
195                     llvm::DenseSet<const Module *> &AddedModules);
196 
197   /// Traverses the affecting modules and updates \c MD with references to the
198   /// parent \c ModuleDepCollector info.
199   void addAllAffectingClangModules(const Module *M, ModuleDeps &MD,
200                               llvm::DenseSet<const Module *> &AddedModules);
201   void addAffectingClangModule(const Module *M, ModuleDeps &MD,
202                           llvm::DenseSet<const Module *> &AddedModules);
203 };
204 
205 /// Collects modular and non-modular dependencies of the main file by attaching
206 /// \c ModuleDepCollectorPP to the preprocessor.
207 class ModuleDepCollector final : public DependencyCollector {
208 public:
209   ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,
210                      CompilerInstance &ScanInstance, DependencyConsumer &C,
211                      DependencyActionController &Controller,
212                      CompilerInvocation OriginalCI, bool OptimizeArgs,
213                      bool EagerLoadModules, bool IsStdModuleP1689Format);
214 
215   void attachToPreprocessor(Preprocessor &PP) override;
216   void attachToASTReader(ASTReader &R) override;
217 
218   /// Apply any changes implied by the discovered dependencies to the given
219   /// invocation, (e.g. disable implicit modules, add explicit module paths).
220   void applyDiscoveredDependencies(CompilerInvocation &CI);
221 
222 private:
223   friend ModuleDepCollectorPP;
224 
225   /// The compiler instance for scanning the current translation unit.
226   CompilerInstance &ScanInstance;
227   /// The consumer of collected dependency information.
228   DependencyConsumer &Consumer;
229   /// Callbacks for computing dependency information.
230   DependencyActionController &Controller;
231   /// Path to the main source file.
232   std::string MainFile;
233   /// Hash identifying the compilation conditions of the current TU.
234   std::string ContextHash;
235   /// Non-modular file dependencies. This includes the main source file and
236   /// textually included header files.
237   std::vector<std::string> FileDeps;
238   /// Direct and transitive modular dependencies of the main source file.
239   llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps;
240   /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without
241   /// a preprocessor. Storage owned by \c ModularDeps.
242   llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID;
243   /// Direct modular dependencies that have already been built.
244   llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps;
245   /// Options that control the dependency output generation.
246   std::unique_ptr<DependencyOutputOptions> Opts;
247   /// The original Clang invocation passed to dependency scanner.
248   CompilerInvocation OriginalInvocation;
249   /// Whether to optimize the modules' command-line arguments.
250   bool OptimizeArgs;
251   /// Whether to set up command-lines to load PCM files eagerly.
252   bool EagerLoadModules;
253   /// If we're generating dependency output in P1689 format
254   /// for standard C++ modules.
255   bool IsStdModuleP1689Format;
256 
257   std::optional<P1689ModuleInfo> ProvidedStdCXXModule;
258   std::vector<P1689ModuleInfo> RequiredStdCXXModules;
259 
260   /// Checks whether the module is known as being prebuilt.
261   bool isPrebuiltModule(const Module *M);
262 
263   /// Adds \p Path to \c FileDeps, making it absolute if necessary.
264   void addFileDep(StringRef Path);
265   /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary.
266   void addFileDep(ModuleDeps &MD, StringRef Path);
267 
268   /// Constructs a CompilerInvocation that can be used to build the given
269   /// module, excluding paths to discovered modular dependencies that are yet to
270   /// be built.
271   CompilerInvocation makeInvocationForModuleBuildWithoutOutputs(
272       const ModuleDeps &Deps,
273       llvm::function_ref<void(CompilerInvocation &)> Optimize) const;
274 
275   /// Collect module map files for given modules.
276   llvm::DenseSet<const FileEntry *>
277   collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const;
278 
279   /// Add module map files to the invocation, if needed.
280   void addModuleMapFiles(CompilerInvocation &CI,
281                          ArrayRef<ModuleID> ClangModuleDeps) const;
282   /// Add module files (pcm) to the invocation, if needed.
283   void addModuleFiles(CompilerInvocation &CI,
284                       ArrayRef<ModuleID> ClangModuleDeps) const;
285 
286   /// Add paths that require looking up outputs to the given dependencies.
287   void addOutputPaths(CompilerInvocation &CI, ModuleDeps &Deps);
288 
289   /// Compute the context hash for \p Deps, and create the mapping
290   /// \c ModuleDepsByID[Deps.ID] = &Deps.
291   void associateWithContextHash(const CompilerInvocation &CI, ModuleDeps &Deps);
292 };
293 
294 } // end namespace dependencies
295 } // end namespace tooling
296 } // end namespace clang
297 
298 namespace llvm {
299 template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> {
300   using ModuleID = clang::tooling::dependencies::ModuleID;
301   static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; }
302   static inline ModuleID getTombstoneKey() {
303     return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash
304   }
305   static unsigned getHashValue(const ModuleID &ID) {
306     return hash_combine(ID.ModuleName, ID.ContextHash);
307   }
308   static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) {
309     return LHS == RHS;
310   }
311 };
312 } // namespace llvm
313 
314 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
315