1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 11 12 #include "clang/Basic/LLVM.h" 13 #include "clang/Basic/SourceManager.h" 14 #include "clang/Frontend/CompilerInvocation.h" 15 #include "clang/Frontend/Utils.h" 16 #include "clang/Lex/HeaderSearch.h" 17 #include "clang/Lex/PPCallbacks.h" 18 #include "clang/Serialization/ASTReader.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/StringSet.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include <string> 23 #include <unordered_map> 24 25 namespace clang { 26 namespace tooling { 27 namespace dependencies { 28 29 class DependencyConsumer; 30 31 /// Modular dependency that has already been built prior to the dependency scan. 32 struct PrebuiltModuleDep { 33 std::string ModuleName; 34 std::string PCMFile; 35 std::string ModuleMapFile; 36 37 explicit PrebuiltModuleDep(const Module *M) 38 : ModuleName(M->getTopLevelModuleName()), 39 PCMFile(M->getASTFile()->getName()), 40 ModuleMapFile(M->PresumedModuleMapFile) {} 41 }; 42 43 /// This is used to identify a specific module. 44 struct ModuleID { 45 /// The name of the module. This may include `:` for C++20 module partitions, 46 /// or a header-name for C++20 header units. 47 std::string ModuleName; 48 49 /// The context hash of a module represents the set of compiler options that 50 /// may make one version of a module incompatible with another. This includes 51 /// things like language mode, predefined macros, header search paths, etc... 52 /// 53 /// Modules with the same name but a different \c ContextHash should be 54 /// treated as separate modules for the purpose of a build. 55 std::string ContextHash; 56 57 bool operator==(const ModuleID &Other) const { 58 return ModuleName == Other.ModuleName && ContextHash == Other.ContextHash; 59 } 60 }; 61 62 struct ModuleIDHasher { 63 std::size_t operator()(const ModuleID &MID) const { 64 return llvm::hash_combine(MID.ModuleName, MID.ContextHash); 65 } 66 }; 67 68 /// An output from a module compilation, such as the path of the module file. 69 enum class ModuleOutputKind { 70 /// The module file (.pcm). Required. 71 ModuleFile, 72 /// The path of the dependency file (.d), if any. 73 DependencyFile, 74 /// The null-separated list of names to use as the targets in the dependency 75 /// file, if any. Defaults to the value of \c ModuleFile, as in the driver. 76 DependencyTargets, 77 /// The path of the serialized diagnostic file (.dia), if any. 78 DiagnosticSerializationFile, 79 }; 80 81 struct ModuleDeps { 82 /// The identifier of the module. 83 ModuleID ID; 84 85 /// Whether this is a "system" module. 86 bool IsSystem; 87 88 /// The path to the modulemap file which defines this module. 89 /// 90 /// This can be used to explicitly build this module. This file will 91 /// additionally appear in \c FileDeps as a dependency. 92 std::string ClangModuleMapFile; 93 94 /// The path to where an implicit build would put the PCM for this module. 95 std::string ImplicitModulePCMPath; 96 97 /// A collection of absolute paths to files that this module directly depends 98 /// on, not including transitive dependencies. 99 llvm::StringSet<> FileDeps; 100 101 /// A collection of absolute paths to module map files that this module needs 102 /// to know about. 103 std::vector<std::string> ModuleMapFileDeps; 104 105 /// A collection of prebuilt modular dependencies this module directly depends 106 /// on, not including transitive dependencies. 107 std::vector<PrebuiltModuleDep> PrebuiltModuleDeps; 108 109 /// A list of module identifiers this module directly depends on, not 110 /// including transitive dependencies. 111 /// 112 /// This may include modules with a different context hash when it can be 113 /// determined that the differences are benign for this compilation. 114 std::vector<ModuleID> ClangModuleDeps; 115 116 // Used to track which modules that were discovered were directly imported by 117 // the primary TU. 118 bool ImportedByMainFile = false; 119 120 /// Whether the TU had a dependency file. The path in \c BuildInvocation is 121 /// cleared to avoid leaking the specific path from the TU into the module. 122 bool HadDependencyFile = false; 123 124 /// Whether the TU had serialized diagnostics. The path in \c BuildInvocation 125 /// is cleared to avoid leaking the specific path from the TU into the module. 126 bool HadSerializedDiagnostics = false; 127 128 /// Compiler invocation that can be used to build this module (without paths). 129 CompilerInvocation BuildInvocation; 130 131 /// Gets the canonical command line suitable for passing to clang. 132 /// 133 /// \param LookupModuleOutput This function is called to fill in 134 /// "-fmodule-file=", "-o" and other output 135 /// arguments. 136 std::vector<std::string> getCanonicalCommandLine( 137 llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)> 138 LookupModuleOutput) const; 139 140 /// Gets the canonical command line suitable for passing to clang, excluding 141 /// "-fmodule-file=" and "-o" arguments. 142 std::vector<std::string> getCanonicalCommandLineWithoutModulePaths() const; 143 }; 144 145 class ModuleDepCollector; 146 147 /// Callback that records textual includes and direct modular includes/imports 148 /// during preprocessing. At the end of the main file, it also collects 149 /// transitive modular dependencies and passes everything to the 150 /// \c DependencyConsumer of the parent \c ModuleDepCollector. 151 class ModuleDepCollectorPP final : public PPCallbacks { 152 public: 153 ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {} 154 155 void FileChanged(SourceLocation Loc, FileChangeReason Reason, 156 SrcMgr::CharacteristicKind FileType, 157 FileID PrevFID) override; 158 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, 159 StringRef FileName, bool IsAngled, 160 CharSourceRange FilenameRange, 161 Optional<FileEntryRef> File, StringRef SearchPath, 162 StringRef RelativePath, const Module *Imported, 163 SrcMgr::CharacteristicKind FileType) override; 164 void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, 165 const Module *Imported) override; 166 167 void EndOfMainFile() override; 168 169 private: 170 /// The parent dependency collector. 171 ModuleDepCollector &MDC; 172 /// Working set of direct modular dependencies. 173 llvm::SetVector<const Module *> DirectModularDeps; 174 /// Working set of direct modular dependencies that have already been built. 175 llvm::SetVector<const Module *> DirectPrebuiltModularDeps; 176 177 void handleImport(const Module *Imported); 178 179 /// Adds direct modular dependencies that have already been built to the 180 /// ModuleDeps instance. 181 void 182 addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD, 183 llvm::DenseSet<const Module *> &SeenSubmodules); 184 void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD, 185 llvm::DenseSet<const Module *> &SeenSubmodules); 186 187 /// Traverses the previously collected direct modular dependencies to discover 188 /// transitive modular dependencies and fills the parent \c ModuleDepCollector 189 /// with both. 190 ModuleID handleTopLevelModule(const Module *M); 191 void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD, 192 llvm::DenseSet<const Module *> &AddedModules); 193 void addModuleDep(const Module *M, ModuleDeps &MD, 194 llvm::DenseSet<const Module *> &AddedModules); 195 }; 196 197 /// Collects modular and non-modular dependencies of the main file by attaching 198 /// \c ModuleDepCollectorPP to the preprocessor. 199 class ModuleDepCollector final : public DependencyCollector { 200 public: 201 ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts, 202 CompilerInstance &ScanInstance, DependencyConsumer &C, 203 CompilerInvocation &&OriginalCI, bool OptimizeArgs); 204 205 void attachToPreprocessor(Preprocessor &PP) override; 206 void attachToASTReader(ASTReader &R) override; 207 208 private: 209 friend ModuleDepCollectorPP; 210 211 /// The compiler instance for scanning the current translation unit. 212 CompilerInstance &ScanInstance; 213 /// The consumer of collected dependency information. 214 DependencyConsumer &Consumer; 215 /// Path to the main source file. 216 std::string MainFile; 217 /// Hash identifying the compilation conditions of the current TU. 218 std::string ContextHash; 219 /// Non-modular file dependencies. This includes the main source file and 220 /// textually included header files. 221 std::vector<std::string> FileDeps; 222 /// Direct and transitive modular dependencies of the main source file. 223 llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps; 224 /// Options that control the dependency output generation. 225 std::unique_ptr<DependencyOutputOptions> Opts; 226 /// The original Clang invocation passed to dependency scanner. 227 CompilerInvocation OriginalInvocation; 228 /// Whether to optimize the modules' command-line arguments. 229 bool OptimizeArgs; 230 231 /// Checks whether the module is known as being prebuilt. 232 bool isPrebuiltModule(const Module *M); 233 234 /// Constructs a CompilerInvocation that can be used to build the given 235 /// module, excluding paths to discovered modular dependencies that are yet to 236 /// be built. 237 CompilerInvocation makeInvocationForModuleBuildWithoutPaths( 238 const ModuleDeps &Deps, 239 llvm::function_ref<void(CompilerInvocation &)> Optimize) const; 240 }; 241 242 } // end namespace dependencies 243 } // end namespace tooling 244 } // end namespace clang 245 246 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 247