1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
11 
12 #include "clang/Basic/LLVM.h"
13 #include "clang/Basic/SourceManager.h"
14 #include "clang/Frontend/CompilerInvocation.h"
15 #include "clang/Frontend/Utils.h"
16 #include "clang/Lex/HeaderSearch.h"
17 #include "clang/Lex/PPCallbacks.h"
18 #include "clang/Serialization/ASTReader.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/StringSet.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include <optional>
23 #include <string>
24 #include <unordered_map>
25 
26 namespace clang {
27 namespace tooling {
28 namespace dependencies {
29 
30 class DependencyConsumer;
31 
32 /// Modular dependency that has already been built prior to the dependency scan.
33 struct PrebuiltModuleDep {
34   std::string ModuleName;
35   std::string PCMFile;
36   std::string ModuleMapFile;
37 
38   explicit PrebuiltModuleDep(const Module *M)
39       : ModuleName(M->getTopLevelModuleName()),
40         PCMFile(M->getASTFile()->getName()),
41         ModuleMapFile(M->PresumedModuleMapFile) {}
42 };
43 
44 /// This is used to identify a specific module.
45 struct ModuleID {
46   /// The name of the module. This may include `:` for C++20 module partitions,
47   /// or a header-name for C++20 header units.
48   std::string ModuleName;
49 
50   /// The context hash of a module represents the compiler options that affect
51   /// the resulting command-line invocation.
52   ///
53   /// Modules with the same name and ContextHash but different invocations could
54   /// cause non-deterministic build results.
55   ///
56   /// Modules with the same name but a different \c ContextHash should be
57   /// treated as separate modules for the purpose of a build.
58   std::string ContextHash;
59 
60   bool operator==(const ModuleID &Other) const {
61     return ModuleName == Other.ModuleName && ContextHash == Other.ContextHash;
62   }
63 };
64 
65 /// P1689ModuleInfo - Represents the needed information of standard C++20
66 /// modules for P1689 format.
67 struct P1689ModuleInfo {
68   /// The name of the module. This may include `:` for partitions.
69   std::string ModuleName;
70 
71   /// Optional. The source path to the module.
72   std::string SourcePath;
73 
74   /// If this module is a standard c++ interface unit.
75   bool IsStdCXXModuleInterface = true;
76 
77   enum class ModuleType {
78     NamedCXXModule
79     // To be supported
80     // AngleHeaderUnit,
81     // QuoteHeaderUnit
82   };
83   ModuleType Type = ModuleType::NamedCXXModule;
84 };
85 
86 /// An output from a module compilation, such as the path of the module file.
87 enum class ModuleOutputKind {
88   /// The module file (.pcm). Required.
89   ModuleFile,
90   /// The path of the dependency file (.d), if any.
91   DependencyFile,
92   /// The null-separated list of names to use as the targets in the dependency
93   /// file, if any. Defaults to the value of \c ModuleFile, as in the driver.
94   DependencyTargets,
95   /// The path of the serialized diagnostic file (.dia), if any.
96   DiagnosticSerializationFile,
97 };
98 
99 struct ModuleDeps {
100   /// The identifier of the module.
101   ModuleID ID;
102 
103   /// Whether this is a "system" module.
104   bool IsSystem;
105 
106   /// The path to the modulemap file which defines this module.
107   ///
108   /// This can be used to explicitly build this module. This file will
109   /// additionally appear in \c FileDeps as a dependency.
110   std::string ClangModuleMapFile;
111 
112   /// A collection of absolute paths to files that this module directly depends
113   /// on, not including transitive dependencies.
114   llvm::StringSet<> FileDeps;
115 
116   /// A collection of absolute paths to module map files that this module needs
117   /// to know about. The ordering is significant.
118   std::vector<std::string> ModuleMapFileDeps;
119 
120   /// A collection of prebuilt modular dependencies this module directly depends
121   /// on, not including transitive dependencies.
122   std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
123 
124   /// A list of module identifiers this module directly depends on, not
125   /// including transitive dependencies.
126   ///
127   /// This may include modules with a different context hash when it can be
128   /// determined that the differences are benign for this compilation.
129   std::vector<ModuleID> ClangModuleDeps;
130 
131   // Used to track which modules that were discovered were directly imported by
132   // the primary TU.
133   bool ImportedByMainFile = false;
134 
135   /// Compiler invocation that can be used to build this module. Does not
136   /// include argv[0].
137   std::vector<std::string> BuildArguments;
138 };
139 
140 class ModuleDepCollector;
141 
142 /// Callback that records textual includes and direct modular includes/imports
143 /// during preprocessing. At the end of the main file, it also collects
144 /// transitive modular dependencies and passes everything to the
145 /// \c DependencyConsumer of the parent \c ModuleDepCollector.
146 class ModuleDepCollectorPP final : public PPCallbacks {
147 public:
148   ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {}
149 
150   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
151                    SrcMgr::CharacteristicKind FileType,
152                    FileID PrevFID) override;
153   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
154                           StringRef FileName, bool IsAngled,
155                           CharSourceRange FilenameRange,
156                           OptionalFileEntryRef File, StringRef SearchPath,
157                           StringRef RelativePath, const Module *Imported,
158                           SrcMgr::CharacteristicKind FileType) override;
159   void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
160                     const Module *Imported) override;
161 
162   void EndOfMainFile() override;
163 
164 private:
165   /// The parent dependency collector.
166   ModuleDepCollector &MDC;
167   /// Working set of direct modular dependencies.
168   llvm::SetVector<const Module *> DirectModularDeps;
169 
170   void handleImport(const Module *Imported);
171 
172   /// Adds direct modular dependencies that have already been built to the
173   /// ModuleDeps instance.
174   void
175   addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
176                               llvm::DenseSet<const Module *> &SeenSubmodules);
177   void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
178                              llvm::DenseSet<const Module *> &SeenSubmodules);
179 
180   /// Traverses the previously collected direct modular dependencies to discover
181   /// transitive modular dependencies and fills the parent \c ModuleDepCollector
182   /// with both.
183   /// Returns the ID or nothing if the dependency is spurious and is ignored.
184   std::optional<ModuleID> handleTopLevelModule(const Module *M);
185   void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
186                            llvm::DenseSet<const Module *> &AddedModules);
187   void addModuleDep(const Module *M, ModuleDeps &MD,
188                     llvm::DenseSet<const Module *> &AddedModules);
189 
190   /// Traverses the affecting modules and updates \c MD with references to the
191   /// parent \c ModuleDepCollector info.
192   void addAllAffectingClangModules(const Module *M, ModuleDeps &MD,
193                               llvm::DenseSet<const Module *> &AddedModules);
194   void addAffectingClangModule(const Module *M, ModuleDeps &MD,
195                           llvm::DenseSet<const Module *> &AddedModules);
196 };
197 
198 /// Collects modular and non-modular dependencies of the main file by attaching
199 /// \c ModuleDepCollectorPP to the preprocessor.
200 class ModuleDepCollector final : public DependencyCollector {
201 public:
202   ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,
203                      CompilerInstance &ScanInstance, DependencyConsumer &C,
204                      CompilerInvocation OriginalCI, bool OptimizeArgs,
205                      bool EagerLoadModules, bool IsStdModuleP1689Format);
206 
207   void attachToPreprocessor(Preprocessor &PP) override;
208   void attachToASTReader(ASTReader &R) override;
209 
210   /// Apply any changes implied by the discovered dependencies to the given
211   /// invocation, (e.g. disable implicit modules, add explicit module paths).
212   void applyDiscoveredDependencies(CompilerInvocation &CI);
213 
214 private:
215   friend ModuleDepCollectorPP;
216 
217   /// The compiler instance for scanning the current translation unit.
218   CompilerInstance &ScanInstance;
219   /// The consumer of collected dependency information.
220   DependencyConsumer &Consumer;
221   /// Path to the main source file.
222   std::string MainFile;
223   /// Hash identifying the compilation conditions of the current TU.
224   std::string ContextHash;
225   /// Non-modular file dependencies. This includes the main source file and
226   /// textually included header files.
227   std::vector<std::string> FileDeps;
228   /// Direct and transitive modular dependencies of the main source file.
229   llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps;
230   /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without
231   /// a preprocessor. Storage owned by \c ModularDeps.
232   llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID;
233   /// Direct modular dependencies that have already been built.
234   llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps;
235   /// Options that control the dependency output generation.
236   std::unique_ptr<DependencyOutputOptions> Opts;
237   /// The original Clang invocation passed to dependency scanner.
238   CompilerInvocation OriginalInvocation;
239   /// Whether to optimize the modules' command-line arguments.
240   bool OptimizeArgs;
241   /// Whether to set up command-lines to load PCM files eagerly.
242   bool EagerLoadModules;
243   /// If we're generating dependency output in P1689 format
244   /// for standard C++ modules.
245   bool IsStdModuleP1689Format;
246 
247   std::optional<P1689ModuleInfo> ProvidedStdCXXModule;
248   std::vector<P1689ModuleInfo> RequiredStdCXXModules;
249 
250   /// Checks whether the module is known as being prebuilt.
251   bool isPrebuiltModule(const Module *M);
252 
253   /// Adds \p Path to \c FileDeps, making it absolute if necessary.
254   void addFileDep(StringRef Path);
255   /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary.
256   void addFileDep(ModuleDeps &MD, StringRef Path);
257 
258   /// Constructs a CompilerInvocation that can be used to build the given
259   /// module, excluding paths to discovered modular dependencies that are yet to
260   /// be built.
261   CompilerInvocation makeInvocationForModuleBuildWithoutOutputs(
262       const ModuleDeps &Deps,
263       llvm::function_ref<void(CompilerInvocation &)> Optimize) const;
264 
265   /// Collect module map files for given modules.
266   llvm::DenseSet<const FileEntry *>
267   collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const;
268 
269   /// Add module map files to the invocation, if needed.
270   void addModuleMapFiles(CompilerInvocation &CI,
271                          ArrayRef<ModuleID> ClangModuleDeps) const;
272   /// Add module files (pcm) to the invocation, if needed.
273   void addModuleFiles(CompilerInvocation &CI,
274                       ArrayRef<ModuleID> ClangModuleDeps) const;
275 
276   /// Add paths that require looking up outputs to the given dependencies.
277   void addOutputPaths(CompilerInvocation &CI, ModuleDeps &Deps);
278 
279   /// Compute the context hash for \p Deps, and create the mapping
280   /// \c ModuleDepsByID[Deps.ID] = &Deps.
281   void associateWithContextHash(const CompilerInvocation &CI, ModuleDeps &Deps);
282 };
283 
284 } // end namespace dependencies
285 } // end namespace tooling
286 } // end namespace clang
287 
288 namespace llvm {
289 template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> {
290   using ModuleID = clang::tooling::dependencies::ModuleID;
291   static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; }
292   static inline ModuleID getTombstoneKey() {
293     return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash
294   }
295   static unsigned getHashValue(const ModuleID &ID) {
296     return hash_combine(ID.ModuleName, ID.ContextHash);
297   }
298   static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) {
299     return LHS == RHS;
300   }
301 };
302 } // namespace llvm
303 
304 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
305