1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 11 12 #include "clang/Basic/LLVM.h" 13 #include "clang/Basic/SourceManager.h" 14 #include "clang/Frontend/CompilerInvocation.h" 15 #include "clang/Frontend/Utils.h" 16 #include "clang/Lex/HeaderSearch.h" 17 #include "clang/Lex/PPCallbacks.h" 18 #include "clang/Serialization/ASTReader.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/StringSet.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include <optional> 23 #include <string> 24 #include <unordered_map> 25 26 namespace clang { 27 namespace tooling { 28 namespace dependencies { 29 30 class DependencyConsumer; 31 32 /// Modular dependency that has already been built prior to the dependency scan. 33 struct PrebuiltModuleDep { 34 std::string ModuleName; 35 std::string PCMFile; 36 std::string ModuleMapFile; 37 PrebuiltModuleDepPrebuiltModuleDep38 explicit PrebuiltModuleDep(const Module *M) 39 : ModuleName(M->getTopLevelModuleName()), 40 PCMFile(M->getASTFile()->getName()), 41 ModuleMapFile(M->PresumedModuleMapFile) {} 42 }; 43 44 /// This is used to identify a specific module. 45 struct ModuleID { 46 /// The name of the module. This may include `:` for C++20 module partitions, 47 /// or a header-name for C++20 header units. 48 std::string ModuleName; 49 50 /// The context hash of a module represents the compiler options that affect 51 /// the resulting command-line invocation. 52 /// 53 /// Modules with the same name and ContextHash but different invocations could 54 /// cause non-deterministic build results. 55 /// 56 /// Modules with the same name but a different \c ContextHash should be 57 /// treated as separate modules for the purpose of a build. 58 std::string ContextHash; 59 60 bool operator==(const ModuleID &Other) const { 61 return ModuleName == Other.ModuleName && ContextHash == Other.ContextHash; 62 } 63 }; 64 65 /// P1689ModuleInfo - Represents the needed information of standard C++20 66 /// modules for P1689 format. 67 struct P1689ModuleInfo { 68 /// The name of the module. This may include `:` for partitions. 69 std::string ModuleName; 70 71 /// Optional. The source path to the module. 72 std::string SourcePath; 73 74 /// If this module is a standard c++ interface unit. 75 bool IsStdCXXModuleInterface = true; 76 77 enum class ModuleType { 78 NamedCXXModule 79 // To be supported 80 // AngleHeaderUnit, 81 // QuoteHeaderUnit 82 }; 83 ModuleType Type = ModuleType::NamedCXXModule; 84 }; 85 86 /// An output from a module compilation, such as the path of the module file. 87 enum class ModuleOutputKind { 88 /// The module file (.pcm). Required. 89 ModuleFile, 90 /// The path of the dependency file (.d), if any. 91 DependencyFile, 92 /// The null-separated list of names to use as the targets in the dependency 93 /// file, if any. Defaults to the value of \c ModuleFile, as in the driver. 94 DependencyTargets, 95 /// The path of the serialized diagnostic file (.dia), if any. 96 DiagnosticSerializationFile, 97 }; 98 99 struct ModuleDeps { 100 /// The identifier of the module. 101 ModuleID ID; 102 103 /// Whether this is a "system" module. 104 bool IsSystem; 105 106 /// The path to the modulemap file which defines this module. 107 /// 108 /// This can be used to explicitly build this module. This file will 109 /// additionally appear in \c FileDeps as a dependency. 110 std::string ClangModuleMapFile; 111 112 /// A collection of absolute paths to files that this module directly depends 113 /// on, not including transitive dependencies. 114 llvm::StringSet<> FileDeps; 115 116 /// A collection of absolute paths to module map files that this module needs 117 /// to know about. The ordering is significant. 118 std::vector<std::string> ModuleMapFileDeps; 119 120 /// A collection of prebuilt modular dependencies this module directly depends 121 /// on, not including transitive dependencies. 122 std::vector<PrebuiltModuleDep> PrebuiltModuleDeps; 123 124 /// A list of module identifiers this module directly depends on, not 125 /// including transitive dependencies. 126 /// 127 /// This may include modules with a different context hash when it can be 128 /// determined that the differences are benign for this compilation. 129 std::vector<ModuleID> ClangModuleDeps; 130 131 // Used to track which modules that were discovered were directly imported by 132 // the primary TU. 133 bool ImportedByMainFile = false; 134 135 /// Compiler invocation that can be used to build this module. Does not 136 /// include argv[0]. 137 std::vector<std::string> BuildArguments; 138 }; 139 140 class ModuleDepCollector; 141 142 /// Callback that records textual includes and direct modular includes/imports 143 /// during preprocessing. At the end of the main file, it also collects 144 /// transitive modular dependencies and passes everything to the 145 /// \c DependencyConsumer of the parent \c ModuleDepCollector. 146 class ModuleDepCollectorPP final : public PPCallbacks { 147 public: ModuleDepCollectorPP(ModuleDepCollector & MDC)148 ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {} 149 150 void FileChanged(SourceLocation Loc, FileChangeReason Reason, 151 SrcMgr::CharacteristicKind FileType, 152 FileID PrevFID) override; 153 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, 154 StringRef FileName, bool IsAngled, 155 CharSourceRange FilenameRange, 156 OptionalFileEntryRef File, StringRef SearchPath, 157 StringRef RelativePath, const Module *Imported, 158 SrcMgr::CharacteristicKind FileType) override; 159 void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, 160 const Module *Imported) override; 161 162 void EndOfMainFile() override; 163 164 private: 165 /// The parent dependency collector. 166 ModuleDepCollector &MDC; 167 /// Working set of direct modular dependencies. 168 llvm::SetVector<const Module *> DirectModularDeps; 169 170 void handleImport(const Module *Imported); 171 172 /// Adds direct modular dependencies that have already been built to the 173 /// ModuleDeps instance. 174 void 175 addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD, 176 llvm::DenseSet<const Module *> &SeenSubmodules); 177 void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD, 178 llvm::DenseSet<const Module *> &SeenSubmodules); 179 180 /// Traverses the previously collected direct modular dependencies to discover 181 /// transitive modular dependencies and fills the parent \c ModuleDepCollector 182 /// with both. 183 /// Returns the ID or nothing if the dependency is spurious and is ignored. 184 std::optional<ModuleID> handleTopLevelModule(const Module *M); 185 void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD, 186 llvm::DenseSet<const Module *> &AddedModules); 187 void addModuleDep(const Module *M, ModuleDeps &MD, 188 llvm::DenseSet<const Module *> &AddedModules); 189 190 /// Traverses the affecting modules and updates \c MD with references to the 191 /// parent \c ModuleDepCollector info. 192 void addAllAffectingClangModules(const Module *M, ModuleDeps &MD, 193 llvm::DenseSet<const Module *> &AddedModules); 194 void addAffectingClangModule(const Module *M, ModuleDeps &MD, 195 llvm::DenseSet<const Module *> &AddedModules); 196 }; 197 198 /// Collects modular and non-modular dependencies of the main file by attaching 199 /// \c ModuleDepCollectorPP to the preprocessor. 200 class ModuleDepCollector final : public DependencyCollector { 201 public: 202 ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts, 203 CompilerInstance &ScanInstance, DependencyConsumer &C, 204 CompilerInvocation OriginalCI, bool OptimizeArgs, 205 bool EagerLoadModules, bool IsStdModuleP1689Format); 206 207 void attachToPreprocessor(Preprocessor &PP) override; 208 void attachToASTReader(ASTReader &R) override; 209 210 /// Apply any changes implied by the discovered dependencies to the given 211 /// invocation, (e.g. disable implicit modules, add explicit module paths). 212 void applyDiscoveredDependencies(CompilerInvocation &CI); 213 214 private: 215 friend ModuleDepCollectorPP; 216 217 /// The compiler instance for scanning the current translation unit. 218 CompilerInstance &ScanInstance; 219 /// The consumer of collected dependency information. 220 DependencyConsumer &Consumer; 221 /// Path to the main source file. 222 std::string MainFile; 223 /// Hash identifying the compilation conditions of the current TU. 224 std::string ContextHash; 225 /// Non-modular file dependencies. This includes the main source file and 226 /// textually included header files. 227 std::vector<std::string> FileDeps; 228 /// Direct and transitive modular dependencies of the main source file. 229 llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps; 230 /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without 231 /// a preprocessor. Storage owned by \c ModularDeps. 232 llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID; 233 /// Direct modular dependencies that have already been built. 234 llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps; 235 /// Options that control the dependency output generation. 236 std::unique_ptr<DependencyOutputOptions> Opts; 237 /// The original Clang invocation passed to dependency scanner. 238 CompilerInvocation OriginalInvocation; 239 /// Whether to optimize the modules' command-line arguments. 240 bool OptimizeArgs; 241 /// Whether to set up command-lines to load PCM files eagerly. 242 bool EagerLoadModules; 243 /// If we're generating dependency output in P1689 format 244 /// for standard C++ modules. 245 bool IsStdModuleP1689Format; 246 247 std::optional<P1689ModuleInfo> ProvidedStdCXXModule; 248 std::vector<P1689ModuleInfo> RequiredStdCXXModules; 249 250 /// Checks whether the module is known as being prebuilt. 251 bool isPrebuiltModule(const Module *M); 252 253 /// Adds \p Path to \c FileDeps, making it absolute if necessary. 254 void addFileDep(StringRef Path); 255 /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary. 256 void addFileDep(ModuleDeps &MD, StringRef Path); 257 258 /// Constructs a CompilerInvocation that can be used to build the given 259 /// module, excluding paths to discovered modular dependencies that are yet to 260 /// be built. 261 CompilerInvocation makeInvocationForModuleBuildWithoutOutputs( 262 const ModuleDeps &Deps, 263 llvm::function_ref<void(CompilerInvocation &)> Optimize) const; 264 265 /// Collect module map files for given modules. 266 llvm::DenseSet<const FileEntry *> 267 collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const; 268 269 /// Add module map files to the invocation, if needed. 270 void addModuleMapFiles(CompilerInvocation &CI, 271 ArrayRef<ModuleID> ClangModuleDeps) const; 272 /// Add module files (pcm) to the invocation, if needed. 273 void addModuleFiles(CompilerInvocation &CI, 274 ArrayRef<ModuleID> ClangModuleDeps) const; 275 276 /// Add paths that require looking up outputs to the given dependencies. 277 void addOutputPaths(CompilerInvocation &CI, ModuleDeps &Deps); 278 279 /// Compute the context hash for \p Deps, and create the mapping 280 /// \c ModuleDepsByID[Deps.ID] = &Deps. 281 void associateWithContextHash(const CompilerInvocation &CI, ModuleDeps &Deps); 282 }; 283 284 } // end namespace dependencies 285 } // end namespace tooling 286 } // end namespace clang 287 288 namespace llvm { 289 template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> { 290 using ModuleID = clang::tooling::dependencies::ModuleID; 291 static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; } 292 static inline ModuleID getTombstoneKey() { 293 return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash 294 } 295 static unsigned getHashValue(const ModuleID &ID) { 296 return hash_combine(ID.ModuleName, ID.ContextHash); 297 } 298 static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) { 299 return LHS == RHS; 300 } 301 }; 302 } // namespace llvm 303 304 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 305