1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 11 12 #include "clang/Basic/LLVM.h" 13 #include "clang/Basic/SourceManager.h" 14 #include "clang/Frontend/CompilerInvocation.h" 15 #include "clang/Frontend/Utils.h" 16 #include "clang/Lex/HeaderSearch.h" 17 #include "clang/Lex/PPCallbacks.h" 18 #include "clang/Serialization/ASTReader.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/StringSet.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include <string> 23 #include <unordered_map> 24 25 namespace clang { 26 namespace tooling { 27 namespace dependencies { 28 29 class DependencyConsumer; 30 31 /// Modular dependency that has already been built prior to the dependency scan. 32 struct PrebuiltModuleDep { 33 std::string ModuleName; 34 std::string PCMFile; 35 std::string ModuleMapFile; 36 37 explicit PrebuiltModuleDep(const Module *M) 38 : ModuleName(M->getTopLevelModuleName()), 39 PCMFile(M->getASTFile()->getName()), 40 ModuleMapFile(M->PresumedModuleMapFile) {} 41 }; 42 43 /// This is used to identify a specific module. 44 struct ModuleID { 45 /// The name of the module. This may include `:` for C++20 module partitions, 46 /// or a header-name for C++20 header units. 47 std::string ModuleName; 48 49 /// The context hash of a module represents the set of compiler options that 50 /// may make one version of a module incompatible with another. This includes 51 /// things like language mode, predefined macros, header search paths, etc... 52 /// 53 /// Modules with the same name but a different \c ContextHash should be 54 /// treated as separate modules for the purpose of a build. 55 std::string ContextHash; 56 57 bool operator==(const ModuleID &Other) const { 58 return ModuleName == Other.ModuleName && ContextHash == Other.ContextHash; 59 } 60 }; 61 62 struct ModuleIDHasher { 63 std::size_t operator()(const ModuleID &MID) const { 64 return llvm::hash_combine(MID.ModuleName, MID.ContextHash); 65 } 66 }; 67 68 struct ModuleDeps { 69 /// The identifier of the module. 70 ModuleID ID; 71 72 /// Whether this is a "system" module. 73 bool IsSystem; 74 75 /// The path to the modulemap file which defines this module. 76 /// 77 /// This can be used to explicitly build this module. This file will 78 /// additionally appear in \c FileDeps as a dependency. 79 std::string ClangModuleMapFile; 80 81 /// The path to where an implicit build would put the PCM for this module. 82 std::string ImplicitModulePCMPath; 83 84 /// A collection of absolute paths to files that this module directly depends 85 /// on, not including transitive dependencies. 86 llvm::StringSet<> FileDeps; 87 88 /// A collection of absolute paths to module map files that this module needs 89 /// to know about. 90 std::vector<std::string> ModuleMapFileDeps; 91 92 /// A collection of prebuilt modular dependencies this module directly depends 93 /// on, not including transitive dependencies. 94 std::vector<PrebuiltModuleDep> PrebuiltModuleDeps; 95 96 /// A list of module identifiers this module directly depends on, not 97 /// including transitive dependencies. 98 /// 99 /// This may include modules with a different context hash when it can be 100 /// determined that the differences are benign for this compilation. 101 std::vector<ModuleID> ClangModuleDeps; 102 103 // Used to track which modules that were discovered were directly imported by 104 // the primary TU. 105 bool ImportedByMainFile = false; 106 107 /// Compiler invocation that can be used to build this module (without paths). 108 CompilerInvocation BuildInvocation; 109 110 /// Gets the canonical command line suitable for passing to clang. 111 /// 112 /// \param LookupPCMPath This function is called to fill in "-fmodule-file=" 113 /// arguments and the "-o" argument. It needs to return 114 /// a path for where the PCM for the given module is to 115 /// be located. 116 std::vector<std::string> getCanonicalCommandLine( 117 std::function<StringRef(ModuleID)> LookupPCMPath) const; 118 119 /// Gets the canonical command line suitable for passing to clang, excluding 120 /// "-fmodule-file=" and "-o" arguments. 121 std::vector<std::string> getCanonicalCommandLineWithoutModulePaths() const; 122 }; 123 124 class ModuleDepCollector; 125 126 /// Callback that records textual includes and direct modular includes/imports 127 /// during preprocessing. At the end of the main file, it also collects 128 /// transitive modular dependencies and passes everything to the 129 /// \c DependencyConsumer of the parent \c ModuleDepCollector. 130 class ModuleDepCollectorPP final : public PPCallbacks { 131 public: 132 ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {} 133 134 void FileChanged(SourceLocation Loc, FileChangeReason Reason, 135 SrcMgr::CharacteristicKind FileType, 136 FileID PrevFID) override; 137 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, 138 StringRef FileName, bool IsAngled, 139 CharSourceRange FilenameRange, 140 Optional<FileEntryRef> File, StringRef SearchPath, 141 StringRef RelativePath, const Module *Imported, 142 SrcMgr::CharacteristicKind FileType) override; 143 void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, 144 const Module *Imported) override; 145 146 void EndOfMainFile() override; 147 148 private: 149 /// The parent dependency collector. 150 ModuleDepCollector &MDC; 151 /// Working set of direct modular dependencies. 152 llvm::SetVector<const Module *> DirectModularDeps; 153 /// Working set of direct modular dependencies that have already been built. 154 llvm::SetVector<const Module *> DirectPrebuiltModularDeps; 155 156 void handleImport(const Module *Imported); 157 158 /// Adds direct modular dependencies that have already been built to the 159 /// ModuleDeps instance. 160 void 161 addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD, 162 llvm::DenseSet<const Module *> &SeenSubmodules); 163 void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD, 164 llvm::DenseSet<const Module *> &SeenSubmodules); 165 166 /// Traverses the previously collected direct modular dependencies to discover 167 /// transitive modular dependencies and fills the parent \c ModuleDepCollector 168 /// with both. 169 ModuleID handleTopLevelModule(const Module *M); 170 void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD, 171 llvm::DenseSet<const Module *> &AddedModules); 172 void addModuleDep(const Module *M, ModuleDeps &MD, 173 llvm::DenseSet<const Module *> &AddedModules); 174 }; 175 176 /// Collects modular and non-modular dependencies of the main file by attaching 177 /// \c ModuleDepCollectorPP to the preprocessor. 178 class ModuleDepCollector final : public DependencyCollector { 179 public: 180 ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts, 181 CompilerInstance &ScanInstance, DependencyConsumer &C, 182 CompilerInvocation &&OriginalCI, bool OptimizeArgs); 183 184 void attachToPreprocessor(Preprocessor &PP) override; 185 void attachToASTReader(ASTReader &R) override; 186 187 private: 188 friend ModuleDepCollectorPP; 189 190 /// The compiler instance for scanning the current translation unit. 191 CompilerInstance &ScanInstance; 192 /// The consumer of collected dependency information. 193 DependencyConsumer &Consumer; 194 /// Path to the main source file. 195 std::string MainFile; 196 /// Hash identifying the compilation conditions of the current TU. 197 std::string ContextHash; 198 /// Non-modular file dependencies. This includes the main source file and 199 /// textually included header files. 200 std::vector<std::string> FileDeps; 201 /// Direct and transitive modular dependencies of the main source file. 202 llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps; 203 /// Options that control the dependency output generation. 204 std::unique_ptr<DependencyOutputOptions> Opts; 205 /// The original Clang invocation passed to dependency scanner. 206 CompilerInvocation OriginalInvocation; 207 /// Whether to optimize the modules' command-line arguments. 208 bool OptimizeArgs; 209 210 /// Checks whether the module is known as being prebuilt. 211 bool isPrebuiltModule(const Module *M); 212 213 /// Constructs a CompilerInvocation that can be used to build the given 214 /// module, excluding paths to discovered modular dependencies that are yet to 215 /// be built. 216 CompilerInvocation makeInvocationForModuleBuildWithoutPaths( 217 const ModuleDeps &Deps, 218 llvm::function_ref<void(CompilerInvocation &)> Optimize) const; 219 }; 220 221 } // end namespace dependencies 222 } // end namespace tooling 223 } // end namespace clang 224 225 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H 226