1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
11 
12 #include "clang/Basic/LLVM.h"
13 #include "clang/Basic/SourceManager.h"
14 #include "clang/Frontend/CompilerInvocation.h"
15 #include "clang/Frontend/Utils.h"
16 #include "clang/Lex/HeaderSearch.h"
17 #include "clang/Lex/PPCallbacks.h"
18 #include "clang/Serialization/ASTReader.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/StringSet.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include <string>
23 #include <unordered_map>
24 
25 namespace clang {
26 namespace tooling {
27 namespace dependencies {
28 
29 class DependencyConsumer;
30 
31 /// Modular dependency that has already been built prior to the dependency scan.
32 struct PrebuiltModuleDep {
33   std::string ModuleName;
34   std::string PCMFile;
35   std::string ModuleMapFile;
36 
37   explicit PrebuiltModuleDep(const Module *M)
38       : ModuleName(M->getTopLevelModuleName()),
39         PCMFile(M->getASTFile()->getName()),
40         ModuleMapFile(M->PresumedModuleMapFile) {}
41 };
42 
43 /// This is used to identify a specific module.
44 struct ModuleID {
45   /// The name of the module. This may include `:` for C++20 module partitions,
46   /// or a header-name for C++20 header units.
47   std::string ModuleName;
48 
49   /// The context hash of a module represents the set of compiler options that
50   /// may make one version of a module incompatible with another. This includes
51   /// things like language mode, predefined macros, header search paths, etc...
52   ///
53   /// Modules with the same name but a different \c ContextHash should be
54   /// treated as separate modules for the purpose of a build.
55   std::string ContextHash;
56 
57   bool operator==(const ModuleID &Other) const {
58     return ModuleName == Other.ModuleName && ContextHash == Other.ContextHash;
59   }
60 };
61 
62 struct ModuleIDHasher {
63   std::size_t operator()(const ModuleID &MID) const {
64     return llvm::hash_combine(MID.ModuleName, MID.ContextHash);
65   }
66 };
67 
68 /// An output from a module compilation, such as the path of the module file.
69 enum class ModuleOutputKind {
70   /// The module file (.pcm). Required.
71   ModuleFile,
72   /// The path of the dependency file (.d), if any.
73   DependencyFile,
74   /// The null-separated list of names to use as the targets in the dependency
75   /// file, if any. Defaults to the value of \c ModuleFile, as in the driver.
76   DependencyTargets,
77   /// The path of the serialized diagnostic file (.dia), if any.
78   DiagnosticSerializationFile,
79 };
80 
81 struct ModuleDeps {
82   /// The identifier of the module.
83   ModuleID ID;
84 
85   /// Whether this is a "system" module.
86   bool IsSystem;
87 
88   /// The path to the modulemap file which defines this module.
89   ///
90   /// This can be used to explicitly build this module. This file will
91   /// additionally appear in \c FileDeps as a dependency.
92   std::string ClangModuleMapFile;
93 
94   /// The path to where an implicit build would put the PCM for this module.
95   std::string ImplicitModulePCMPath;
96 
97   /// A collection of absolute paths to files that this module directly depends
98   /// on, not including transitive dependencies.
99   llvm::StringSet<> FileDeps;
100 
101   /// A collection of absolute paths to module map files that this module needs
102   /// to know about.
103   std::vector<std::string> ModuleMapFileDeps;
104 
105   /// A collection of prebuilt modular dependencies this module directly depends
106   /// on, not including transitive dependencies.
107   std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
108 
109   /// A list of module identifiers this module directly depends on, not
110   /// including transitive dependencies.
111   ///
112   /// This may include modules with a different context hash when it can be
113   /// determined that the differences are benign for this compilation.
114   std::vector<ModuleID> ClangModuleDeps;
115 
116   // Used to track which modules that were discovered were directly imported by
117   // the primary TU.
118   bool ImportedByMainFile = false;
119 
120   /// Whether the TU had a dependency file. The path in \c BuildInvocation is
121   /// cleared to avoid leaking the specific path from the TU into the module.
122   bool HadDependencyFile = false;
123 
124   /// Whether the TU had serialized diagnostics. The path in \c BuildInvocation
125   /// is cleared to avoid leaking the specific path from the TU into the module.
126   bool HadSerializedDiagnostics = false;
127 
128   /// Compiler invocation that can be used to build this module (without paths).
129   CompilerInvocation BuildInvocation;
130 
131   /// Gets the canonical command line suitable for passing to clang.
132   ///
133   /// \param LookupModuleOutput This function is called to fill in
134   ///                           "-fmodule-file=", "-o" and other output
135   ///                           arguments.
136   std::vector<std::string> getCanonicalCommandLine(
137       llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)>
138           LookupModuleOutput) const;
139 
140   /// Gets the canonical command line suitable for passing to clang, excluding
141   /// "-fmodule-file=" and "-o" arguments.
142   std::vector<std::string> getCanonicalCommandLineWithoutModulePaths() const;
143 };
144 
145 class ModuleDepCollector;
146 
147 /// Callback that records textual includes and direct modular includes/imports
148 /// during preprocessing. At the end of the main file, it also collects
149 /// transitive modular dependencies and passes everything to the
150 /// \c DependencyConsumer of the parent \c ModuleDepCollector.
151 class ModuleDepCollectorPP final : public PPCallbacks {
152 public:
153   ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {}
154 
155   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
156                    SrcMgr::CharacteristicKind FileType,
157                    FileID PrevFID) override;
158   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
159                           StringRef FileName, bool IsAngled,
160                           CharSourceRange FilenameRange,
161                           Optional<FileEntryRef> File, StringRef SearchPath,
162                           StringRef RelativePath, const Module *Imported,
163                           SrcMgr::CharacteristicKind FileType) override;
164   void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
165                     const Module *Imported) override;
166 
167   void EndOfMainFile() override;
168 
169 private:
170   /// The parent dependency collector.
171   ModuleDepCollector &MDC;
172   /// Working set of direct modular dependencies.
173   llvm::SetVector<const Module *> DirectModularDeps;
174   /// Working set of direct modular dependencies that have already been built.
175   llvm::SetVector<const Module *> DirectPrebuiltModularDeps;
176 
177   void handleImport(const Module *Imported);
178 
179   /// Adds direct modular dependencies that have already been built to the
180   /// ModuleDeps instance.
181   void
182   addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
183                               llvm::DenseSet<const Module *> &SeenSubmodules);
184   void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
185                              llvm::DenseSet<const Module *> &SeenSubmodules);
186 
187   /// Traverses the previously collected direct modular dependencies to discover
188   /// transitive modular dependencies and fills the parent \c ModuleDepCollector
189   /// with both.
190   ModuleID handleTopLevelModule(const Module *M);
191   void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
192                            llvm::DenseSet<const Module *> &AddedModules);
193   void addModuleDep(const Module *M, ModuleDeps &MD,
194                     llvm::DenseSet<const Module *> &AddedModules);
195 };
196 
197 /// Collects modular and non-modular dependencies of the main file by attaching
198 /// \c ModuleDepCollectorPP to the preprocessor.
199 class ModuleDepCollector final : public DependencyCollector {
200 public:
201   ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,
202                      CompilerInstance &ScanInstance, DependencyConsumer &C,
203                      CompilerInvocation &&OriginalCI, bool OptimizeArgs);
204 
205   void attachToPreprocessor(Preprocessor &PP) override;
206   void attachToASTReader(ASTReader &R) override;
207 
208 private:
209   friend ModuleDepCollectorPP;
210 
211   /// The compiler instance for scanning the current translation unit.
212   CompilerInstance &ScanInstance;
213   /// The consumer of collected dependency information.
214   DependencyConsumer &Consumer;
215   /// Path to the main source file.
216   std::string MainFile;
217   /// Hash identifying the compilation conditions of the current TU.
218   std::string ContextHash;
219   /// Non-modular file dependencies. This includes the main source file and
220   /// textually included header files.
221   std::vector<std::string> FileDeps;
222   /// Direct and transitive modular dependencies of the main source file.
223   llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps;
224   /// Options that control the dependency output generation.
225   std::unique_ptr<DependencyOutputOptions> Opts;
226   /// The original Clang invocation passed to dependency scanner.
227   CompilerInvocation OriginalInvocation;
228   /// Whether to optimize the modules' command-line arguments.
229   bool OptimizeArgs;
230 
231   /// Checks whether the module is known as being prebuilt.
232   bool isPrebuiltModule(const Module *M);
233 
234   /// Constructs a CompilerInvocation that can be used to build the given
235   /// module, excluding paths to discovered modular dependencies that are yet to
236   /// be built.
237   CompilerInvocation makeInvocationForModuleBuildWithoutPaths(
238       const ModuleDeps &Deps,
239       llvm::function_ref<void(CompilerInvocation &)> Optimize) const;
240 };
241 
242 } // end namespace dependencies
243 } // end namespace tooling
244 } // end namespace clang
245 
246 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
247