1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
11 
12 #include "clang/Basic/LLVM.h"
13 #include "clang/Basic/SourceManager.h"
14 #include "clang/Frontend/CompilerInvocation.h"
15 #include "clang/Frontend/Utils.h"
16 #include "clang/Lex/HeaderSearch.h"
17 #include "clang/Lex/PPCallbacks.h"
18 #include "clang/Serialization/ASTReader.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/StringSet.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include <string>
23 #include <unordered_map>
24 
25 namespace clang {
26 namespace tooling {
27 namespace dependencies {
28 
29 class DependencyConsumer;
30 
31 /// Modular dependency that has already been built prior to the dependency scan.
32 struct PrebuiltModuleDep {
33   std::string ModuleName;
34   std::string PCMFile;
35   std::string ModuleMapFile;
36 
37   explicit PrebuiltModuleDep(const Module *M)
38       : ModuleName(M->getTopLevelModuleName()),
39         PCMFile(M->getASTFile()->getName()),
40         ModuleMapFile(M->PresumedModuleMapFile) {}
41 };
42 
43 /// This is used to identify a specific module.
44 struct ModuleID {
45   /// The name of the module. This may include `:` for C++20 module partitions,
46   /// or a header-name for C++20 header units.
47   std::string ModuleName;
48 
49   /// The context hash of a module represents the set of compiler options that
50   /// may make one version of a module incompatible with another. This includes
51   /// things like language mode, predefined macros, header search paths, etc...
52   ///
53   /// Modules with the same name but a different \c ContextHash should be
54   /// treated as separate modules for the purpose of a build.
55   std::string ContextHash;
56 
57   bool operator==(const ModuleID &Other) const {
58     return ModuleName == Other.ModuleName && ContextHash == Other.ContextHash;
59   }
60 };
61 
62 struct ModuleIDHasher {
63   std::size_t operator()(const ModuleID &MID) const {
64     return llvm::hash_combine(MID.ModuleName, MID.ContextHash);
65   }
66 };
67 
68 struct ModuleDeps {
69   /// The identifier of the module.
70   ModuleID ID;
71 
72   /// Whether this is a "system" module.
73   bool IsSystem;
74 
75   /// The path to the modulemap file which defines this module.
76   ///
77   /// This can be used to explicitly build this module. This file will
78   /// additionally appear in \c FileDeps as a dependency.
79   std::string ClangModuleMapFile;
80 
81   /// The path to where an implicit build would put the PCM for this module.
82   std::string ImplicitModulePCMPath;
83 
84   /// A collection of absolute paths to files that this module directly depends
85   /// on, not including transitive dependencies.
86   llvm::StringSet<> FileDeps;
87 
88   /// A collection of absolute paths to module map files that this module needs
89   /// to know about.
90   std::vector<std::string> ModuleMapFileDeps;
91 
92   /// A collection of prebuilt modular dependencies this module directly depends
93   /// on, not including transitive dependencies.
94   std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
95 
96   /// A list of module identifiers this module directly depends on, not
97   /// including transitive dependencies.
98   ///
99   /// This may include modules with a different context hash when it can be
100   /// determined that the differences are benign for this compilation.
101   std::vector<ModuleID> ClangModuleDeps;
102 
103   // Used to track which modules that were discovered were directly imported by
104   // the primary TU.
105   bool ImportedByMainFile = false;
106 
107   /// Compiler invocation that can be used to build this module (without paths).
108   CompilerInvocation BuildInvocation;
109 
110   /// Gets the canonical command line suitable for passing to clang.
111   ///
112   /// \param LookupPCMPath This function is called to fill in "-fmodule-file="
113   ///                      arguments and the "-o" argument. It needs to return
114   ///                      a path for where the PCM for the given module is to
115   ///                      be located.
116   std::vector<std::string> getCanonicalCommandLine(
117       std::function<StringRef(ModuleID)> LookupPCMPath) const;
118 
119   /// Gets the canonical command line suitable for passing to clang, excluding
120   /// "-fmodule-file=" and "-o" arguments.
121   std::vector<std::string> getCanonicalCommandLineWithoutModulePaths() const;
122 };
123 
124 class ModuleDepCollector;
125 
126 /// Callback that records textual includes and direct modular includes/imports
127 /// during preprocessing. At the end of the main file, it also collects
128 /// transitive modular dependencies and passes everything to the
129 /// \c DependencyConsumer of the parent \c ModuleDepCollector.
130 class ModuleDepCollectorPP final : public PPCallbacks {
131 public:
132   ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {}
133 
134   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
135                    SrcMgr::CharacteristicKind FileType,
136                    FileID PrevFID) override;
137   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
138                           StringRef FileName, bool IsAngled,
139                           CharSourceRange FilenameRange,
140                           Optional<FileEntryRef> File, StringRef SearchPath,
141                           StringRef RelativePath, const Module *Imported,
142                           SrcMgr::CharacteristicKind FileType) override;
143   void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
144                     const Module *Imported) override;
145 
146   void EndOfMainFile() override;
147 
148 private:
149   /// The parent dependency collector.
150   ModuleDepCollector &MDC;
151   /// Working set of direct modular dependencies.
152   llvm::SetVector<const Module *> DirectModularDeps;
153   /// Working set of direct modular dependencies that have already been built.
154   llvm::SetVector<const Module *> DirectPrebuiltModularDeps;
155 
156   void handleImport(const Module *Imported);
157 
158   /// Adds direct modular dependencies that have already been built to the
159   /// ModuleDeps instance.
160   void
161   addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
162                               llvm::DenseSet<const Module *> &SeenSubmodules);
163   void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
164                              llvm::DenseSet<const Module *> &SeenSubmodules);
165 
166   /// Traverses the previously collected direct modular dependencies to discover
167   /// transitive modular dependencies and fills the parent \c ModuleDepCollector
168   /// with both.
169   ModuleID handleTopLevelModule(const Module *M);
170   void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
171                            llvm::DenseSet<const Module *> &AddedModules);
172   void addModuleDep(const Module *M, ModuleDeps &MD,
173                     llvm::DenseSet<const Module *> &AddedModules);
174 };
175 
176 /// Collects modular and non-modular dependencies of the main file by attaching
177 /// \c ModuleDepCollectorPP to the preprocessor.
178 class ModuleDepCollector final : public DependencyCollector {
179 public:
180   ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,
181                      CompilerInstance &ScanInstance, DependencyConsumer &C,
182                      CompilerInvocation &&OriginalCI, bool OptimizeArgs);
183 
184   void attachToPreprocessor(Preprocessor &PP) override;
185   void attachToASTReader(ASTReader &R) override;
186 
187 private:
188   friend ModuleDepCollectorPP;
189 
190   /// The compiler instance for scanning the current translation unit.
191   CompilerInstance &ScanInstance;
192   /// The consumer of collected dependency information.
193   DependencyConsumer &Consumer;
194   /// Path to the main source file.
195   std::string MainFile;
196   /// Hash identifying the compilation conditions of the current TU.
197   std::string ContextHash;
198   /// Non-modular file dependencies. This includes the main source file and
199   /// textually included header files.
200   std::vector<std::string> FileDeps;
201   /// Direct and transitive modular dependencies of the main source file.
202   llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps;
203   /// Options that control the dependency output generation.
204   std::unique_ptr<DependencyOutputOptions> Opts;
205   /// The original Clang invocation passed to dependency scanner.
206   CompilerInvocation OriginalInvocation;
207   /// Whether to optimize the modules' command-line arguments.
208   bool OptimizeArgs;
209 
210   /// Checks whether the module is known as being prebuilt.
211   bool isPrebuiltModule(const Module *M);
212 
213   /// Constructs a CompilerInvocation that can be used to build the given
214   /// module, excluding paths to discovered modular dependencies that are yet to
215   /// be built.
216   CompilerInvocation makeInvocationForModuleBuildWithoutPaths(
217       const ModuleDeps &Deps,
218       llvm::function_ref<void(CompilerInvocation &)> Optimize) const;
219 };
220 
221 } // end namespace dependencies
222 } // end namespace tooling
223 } // end namespace clang
224 
225 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
226