1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 11 12 #include "clang/Basic/LLVM.h" 13 #include "clang/Basic/SourceManager.h" 14 #include "clang/Frontend/CompilerInvocation.h" 15 #include "clang/Frontend/Utils.h" 16 #include "clang/Lex/HeaderSearch.h" 17 #include "clang/Lex/PPCallbacks.h" 18 #include "clang/Serialization/ASTReader.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/StringSet.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include <optional> 23 #include <string> 24 #include <unordered_map> 25 26 namespace clang { 27 namespace tooling { 28 namespace dependencies { 29 30 class DependencyActionController; 31 class DependencyConsumer; 32 33 /// Modular dependency that has already been built prior to the dependency scan. 34 struct PrebuiltModuleDep { 35 std::string ModuleName; 36 std::string PCMFile; 37 std::string ModuleMapFile; 38 39 explicit PrebuiltModuleDep(const Module *M) 40 : ModuleName(M->getTopLevelModuleName()), 41 PCMFile(M->getASTFile()->getName()), 42 ModuleMapFile(M->PresumedModuleMapFile) {} 43 }; 44 45 /// This is used to identify a specific module. 46 struct ModuleID { 47 /// The name of the module. This may include `:` for C++20 module partitions, 48 /// or a header-name for C++20 header units. 49 std::string ModuleName; 50 51 /// The context hash of a module represents the compiler options that affect 52 /// the resulting command-line invocation. 53 /// 54 /// Modules with the same name and ContextHash but different invocations could 55 /// cause non-deterministic build results. 56 /// 57 /// Modules with the same name but a different \c ContextHash should be 58 /// treated as separate modules for the purpose of a build. 59 std::string ContextHash; 60 61 bool operator==(const ModuleID &Other) const { 62 return std::tie(ModuleName, ContextHash) == 63 std::tie(Other.ModuleName, Other.ContextHash); 64 } 65 66 bool operator<(const ModuleID& Other) const { 67 return std::tie(ModuleName, ContextHash) < 68 std::tie(Other.ModuleName, Other.ContextHash); 69 } 70 }; 71 72 /// P1689ModuleInfo - Represents the needed information of standard C++20 73 /// modules for P1689 format. 74 struct P1689ModuleInfo { 75 /// The name of the module. This may include `:` for partitions. 76 std::string ModuleName; 77 78 /// Optional. The source path to the module. 79 std::string SourcePath; 80 81 /// If this module is a standard c++ interface unit. 82 bool IsStdCXXModuleInterface = true; 83 84 enum class ModuleType { 85 NamedCXXModule 86 // To be supported 87 // AngleHeaderUnit, 88 // QuoteHeaderUnit 89 }; 90 ModuleType Type = ModuleType::NamedCXXModule; 91 }; 92 93 /// An output from a module compilation, such as the path of the module file. 94 enum class ModuleOutputKind { 95 /// The module file (.pcm). Required. 96 ModuleFile, 97 /// The path of the dependency file (.d), if any. 98 DependencyFile, 99 /// The null-separated list of names to use as the targets in the dependency 100 /// file, if any. Defaults to the value of \c ModuleFile, as in the driver. 101 DependencyTargets, 102 /// The path of the serialized diagnostic file (.dia), if any. 103 DiagnosticSerializationFile, 104 }; 105 106 struct ModuleDeps { 107 /// The identifier of the module. 108 ModuleID ID; 109 110 /// Whether this is a "system" module. 111 bool IsSystem; 112 113 /// The path to the modulemap file which defines this module. 114 /// 115 /// This can be used to explicitly build this module. This file will 116 /// additionally appear in \c FileDeps as a dependency. 117 std::string ClangModuleMapFile; 118 119 /// A collection of absolute paths to files that this module directly depends 120 /// on, not including transitive dependencies. 121 llvm::StringSet<> FileDeps; 122 123 /// A collection of absolute paths to module map files that this module needs 124 /// to know about. The ordering is significant. 125 std::vector<std::string> ModuleMapFileDeps; 126 127 /// A collection of prebuilt modular dependencies this module directly depends 128 /// on, not including transitive dependencies. 129 std::vector<PrebuiltModuleDep> PrebuiltModuleDeps; 130 131 /// A list of module identifiers this module directly depends on, not 132 /// including transitive dependencies. 133 /// 134 /// This may include modules with a different context hash when it can be 135 /// determined that the differences are benign for this compilation. 136 std::vector<ModuleID> ClangModuleDeps; 137 138 // Used to track which modules that were discovered were directly imported by 139 // the primary TU. 140 bool ImportedByMainFile = false; 141 142 /// Compiler invocation that can be used to build this module. Does not 143 /// include argv[0]. 144 std::vector<std::string> BuildArguments; 145 }; 146 147 class ModuleDepCollector; 148 149 /// Callback that records textual includes and direct modular includes/imports 150 /// during preprocessing. At the end of the main file, it also collects 151 /// transitive modular dependencies and passes everything to the 152 /// \c DependencyConsumer of the parent \c ModuleDepCollector. 153 class ModuleDepCollectorPP final : public PPCallbacks { 154 public: 155 ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {} 156 157 void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, 158 SrcMgr::CharacteristicKind FileType, FileID PrevFID, 159 SourceLocation Loc) override; 160 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, 161 StringRef FileName, bool IsAngled, 162 CharSourceRange FilenameRange, 163 OptionalFileEntryRef File, StringRef SearchPath, 164 StringRef RelativePath, const Module *Imported, 165 SrcMgr::CharacteristicKind FileType) override; 166 void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, 167 const Module *Imported) override; 168 169 void EndOfMainFile() override; 170 171 private: 172 /// The parent dependency collector. 173 ModuleDepCollector &MDC; 174 /// Working set of direct modular dependencies. 175 llvm::SetVector<const Module *> DirectModularDeps; 176 177 void handleImport(const Module *Imported); 178 179 /// Adds direct modular dependencies that have already been built to the 180 /// ModuleDeps instance. 181 void 182 addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD, 183 llvm::DenseSet<const Module *> &SeenSubmodules); 184 void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD, 185 llvm::DenseSet<const Module *> &SeenSubmodules); 186 187 /// Traverses the previously collected direct modular dependencies to discover 188 /// transitive modular dependencies and fills the parent \c ModuleDepCollector 189 /// with both. 190 /// Returns the ID or nothing if the dependency is spurious and is ignored. 191 std::optional<ModuleID> handleTopLevelModule(const Module *M); 192 void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD, 193 llvm::DenseSet<const Module *> &AddedModules); 194 void addModuleDep(const Module *M, ModuleDeps &MD, 195 llvm::DenseSet<const Module *> &AddedModules); 196 197 /// Traverses the affecting modules and updates \c MD with references to the 198 /// parent \c ModuleDepCollector info. 199 void addAllAffectingClangModules(const Module *M, ModuleDeps &MD, 200 llvm::DenseSet<const Module *> &AddedModules); 201 void addAffectingClangModule(const Module *M, ModuleDeps &MD, 202 llvm::DenseSet<const Module *> &AddedModules); 203 }; 204 205 /// Collects modular and non-modular dependencies of the main file by attaching 206 /// \c ModuleDepCollectorPP to the preprocessor. 207 class ModuleDepCollector final : public DependencyCollector { 208 public: 209 ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts, 210 CompilerInstance &ScanInstance, DependencyConsumer &C, 211 DependencyActionController &Controller, 212 CompilerInvocation OriginalCI, bool OptimizeArgs, 213 bool EagerLoadModules, bool IsStdModuleP1689Format); 214 215 void attachToPreprocessor(Preprocessor &PP) override; 216 void attachToASTReader(ASTReader &R) override; 217 218 /// Apply any changes implied by the discovered dependencies to the given 219 /// invocation, (e.g. disable implicit modules, add explicit module paths). 220 void applyDiscoveredDependencies(CompilerInvocation &CI); 221 222 private: 223 friend ModuleDepCollectorPP; 224 225 /// The compiler instance for scanning the current translation unit. 226 CompilerInstance &ScanInstance; 227 /// The consumer of collected dependency information. 228 DependencyConsumer &Consumer; 229 /// Callbacks for computing dependency information. 230 DependencyActionController &Controller; 231 /// Path to the main source file. 232 std::string MainFile; 233 /// Hash identifying the compilation conditions of the current TU. 234 std::string ContextHash; 235 /// Non-modular file dependencies. This includes the main source file and 236 /// textually included header files. 237 std::vector<std::string> FileDeps; 238 /// Direct and transitive modular dependencies of the main source file. 239 llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps; 240 /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without 241 /// a preprocessor. Storage owned by \c ModularDeps. 242 llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID; 243 /// Direct modular dependencies that have already been built. 244 llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps; 245 /// Options that control the dependency output generation. 246 std::unique_ptr<DependencyOutputOptions> Opts; 247 /// The original Clang invocation passed to dependency scanner. 248 CompilerInvocation OriginalInvocation; 249 /// Whether to optimize the modules' command-line arguments. 250 bool OptimizeArgs; 251 /// Whether to set up command-lines to load PCM files eagerly. 252 bool EagerLoadModules; 253 /// If we're generating dependency output in P1689 format 254 /// for standard C++ modules. 255 bool IsStdModuleP1689Format; 256 257 std::optional<P1689ModuleInfo> ProvidedStdCXXModule; 258 std::vector<P1689ModuleInfo> RequiredStdCXXModules; 259 260 /// Checks whether the module is known as being prebuilt. 261 bool isPrebuiltModule(const Module *M); 262 263 /// Adds \p Path to \c FileDeps, making it absolute if necessary. 264 void addFileDep(StringRef Path); 265 /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary. 266 void addFileDep(ModuleDeps &MD, StringRef Path); 267 268 /// Constructs a CompilerInvocation that can be used to build the given 269 /// module, excluding paths to discovered modular dependencies that are yet to 270 /// be built. 271 CompilerInvocation makeInvocationForModuleBuildWithoutOutputs( 272 const ModuleDeps &Deps, 273 llvm::function_ref<void(CompilerInvocation &)> Optimize) const; 274 275 /// Collect module map files for given modules. 276 llvm::DenseSet<const FileEntry *> 277 collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const; 278 279 /// Add module map files to the invocation, if needed. 280 void addModuleMapFiles(CompilerInvocation &CI, 281 ArrayRef<ModuleID> ClangModuleDeps) const; 282 /// Add module files (pcm) to the invocation, if needed. 283 void addModuleFiles(CompilerInvocation &CI, 284 ArrayRef<ModuleID> ClangModuleDeps) const; 285 286 /// Add paths that require looking up outputs to the given dependencies. 287 void addOutputPaths(CompilerInvocation &CI, ModuleDeps &Deps); 288 289 /// Compute the context hash for \p Deps, and create the mapping 290 /// \c ModuleDepsByID[Deps.ID] = &Deps. 291 void associateWithContextHash(const CompilerInvocation &CI, ModuleDeps &Deps); 292 }; 293 294 } // end namespace dependencies 295 } // end namespace tooling 296 } // end namespace clang 297 298 namespace llvm { 299 template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> { 300 using ModuleID = clang::tooling::dependencies::ModuleID; 301 static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; } 302 static inline ModuleID getTombstoneKey() { 303 return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash 304 } 305 static unsigned getHashValue(const ModuleID &ID) { 306 return hash_combine(ID.ModuleName, ID.ContextHash); 307 } 308 static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) { 309 return LHS == RHS; 310 } 311 }; 312 } // namespace llvm 313 314 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 315