1 //===- ClangScanDeps.cpp - Implementation of clang-scan-deps --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Frontend/CompilerInstance.h" 10 #include "clang/Tooling/CommonOptionsParser.h" 11 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h" 12 #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h" 13 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" 14 #include "clang/Tooling/JSONCompilationDatabase.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/Twine.h" 17 #include "llvm/Support/CommandLine.h" 18 #include "llvm/Support/FileUtilities.h" 19 #include "llvm/Support/InitLLVM.h" 20 #include "llvm/Support/JSON.h" 21 #include "llvm/Support/Program.h" 22 #include "llvm/Support/Signals.h" 23 #include "llvm/Support/ThreadPool.h" 24 #include "llvm/Support/Threading.h" 25 #include <mutex> 26 #include <thread> 27 28 using namespace clang; 29 using namespace tooling::dependencies; 30 31 namespace { 32 33 class SharedStream { 34 public: 35 SharedStream(raw_ostream &OS) : OS(OS) {} 36 void applyLocked(llvm::function_ref<void(raw_ostream &OS)> Fn) { 37 std::unique_lock<std::mutex> LockGuard(Lock); 38 Fn(OS); 39 OS.flush(); 40 } 41 42 private: 43 std::mutex Lock; 44 raw_ostream &OS; 45 }; 46 47 class ResourceDirectoryCache { 48 public: 49 /// findResourceDir finds the resource directory relative to the clang 50 /// compiler being used in Args, by running it with "-print-resource-dir" 51 /// option and cache the results for reuse. \returns resource directory path 52 /// associated with the given invocation command or empty string if the 53 /// compiler path is NOT an absolute path. 54 StringRef findResourceDir(const tooling::CommandLineArguments &Args, 55 bool ClangCLMode) { 56 if (Args.size() < 1) 57 return ""; 58 59 const std::string &ClangBinaryPath = Args[0]; 60 if (!llvm::sys::path::is_absolute(ClangBinaryPath)) 61 return ""; 62 63 const std::string &ClangBinaryName = 64 std::string(llvm::sys::path::filename(ClangBinaryPath)); 65 66 std::unique_lock<std::mutex> LockGuard(CacheLock); 67 const auto &CachedResourceDir = Cache.find(ClangBinaryPath); 68 if (CachedResourceDir != Cache.end()) 69 return CachedResourceDir->second; 70 71 std::vector<StringRef> PrintResourceDirArgs{ClangBinaryName}; 72 if (ClangCLMode) 73 PrintResourceDirArgs.push_back("/clang:-print-resource-dir"); 74 else 75 PrintResourceDirArgs.push_back("-print-resource-dir"); 76 77 llvm::SmallString<64> OutputFile, ErrorFile; 78 llvm::sys::fs::createTemporaryFile("print-resource-dir-output", 79 "" /*no-suffix*/, OutputFile); 80 llvm::sys::fs::createTemporaryFile("print-resource-dir-error", 81 "" /*no-suffix*/, ErrorFile); 82 llvm::FileRemover OutputRemover(OutputFile.c_str()); 83 llvm::FileRemover ErrorRemover(ErrorFile.c_str()); 84 llvm::Optional<StringRef> Redirects[] = { 85 {""}, // Stdin 86 OutputFile.str(), 87 ErrorFile.str(), 88 }; 89 if (const int RC = llvm::sys::ExecuteAndWait( 90 ClangBinaryPath, PrintResourceDirArgs, {}, Redirects)) { 91 auto ErrorBuf = llvm::MemoryBuffer::getFile(ErrorFile.c_str()); 92 llvm::errs() << ErrorBuf.get()->getBuffer(); 93 return ""; 94 } 95 96 auto OutputBuf = llvm::MemoryBuffer::getFile(OutputFile.c_str()); 97 if (!OutputBuf) 98 return ""; 99 StringRef Output = OutputBuf.get()->getBuffer().rtrim('\n'); 100 101 Cache[ClangBinaryPath] = Output.str(); 102 return Cache[ClangBinaryPath]; 103 } 104 105 private: 106 std::map<std::string, std::string> Cache; 107 std::mutex CacheLock; 108 }; 109 110 llvm::cl::opt<bool> Help("h", llvm::cl::desc("Alias for -help"), 111 llvm::cl::Hidden); 112 113 llvm::cl::OptionCategory DependencyScannerCategory("Tool options"); 114 115 static llvm::cl::opt<ScanningMode> ScanMode( 116 "mode", 117 llvm::cl::desc("The preprocessing mode used to compute the dependencies"), 118 llvm::cl::values( 119 clEnumValN(ScanningMode::MinimizedSourcePreprocessing, 120 "preprocess-minimized-sources", 121 "The set of dependencies is computed by preprocessing the " 122 "source files that were minimized to only include the " 123 "contents that might affect the dependencies"), 124 clEnumValN(ScanningMode::CanonicalPreprocessing, "preprocess", 125 "The set of dependencies is computed by preprocessing the " 126 "unmodified source files")), 127 llvm::cl::init(ScanningMode::MinimizedSourcePreprocessing), 128 llvm::cl::cat(DependencyScannerCategory)); 129 130 static llvm::cl::opt<ScanningOutputFormat> Format( 131 "format", llvm::cl::desc("The output format for the dependencies"), 132 llvm::cl::values(clEnumValN(ScanningOutputFormat::Make, "make", 133 "Makefile compatible dep file"), 134 clEnumValN(ScanningOutputFormat::Full, "experimental-full", 135 "Full dependency graph suitable" 136 " for explicitly building modules. This format " 137 "is experimental and will change.")), 138 llvm::cl::init(ScanningOutputFormat::Make), 139 llvm::cl::cat(DependencyScannerCategory)); 140 141 // This mode is mostly useful for development of explicitly built modules. 142 // Command lines will contain arguments specifying modulemap file paths and 143 // absolute paths to PCM files in the module cache directory. 144 // 145 // Build tools that want to put the PCM files in a different location should use 146 // the C++ APIs instead, of which there are two flavors: 147 // 148 // 1. APIs that generate arguments with paths to modulemap and PCM files via 149 // callbacks provided by the client: 150 // * ModuleDeps::getCanonicalCommandLine(LookupPCMPath, LookupModuleDeps) 151 // * FullDependencies::getAdditionalArgs(LookupPCMPath, LookupModuleDeps) 152 // 153 // 2. APIs that don't generate arguments with paths to modulemap or PCM files 154 // and instead expect the client to append them manually after the fact: 155 // * ModuleDeps::getCanonicalCommandLineWithoutModulePaths() 156 // * FullDependencies::getAdditionalArgsWithoutModulePaths() 157 // 158 static llvm::cl::opt<bool> GenerateModulesPathArgs( 159 "generate-modules-path-args", 160 llvm::cl::desc( 161 "With '-format experimental-full', include arguments specifying " 162 "modules-related paths in the generated command lines: " 163 "'-fmodule-file=', '-o', '-fmodule-map-file='."), 164 llvm::cl::init(false), llvm::cl::cat(DependencyScannerCategory)); 165 166 static llvm::cl::opt<std::string> ModuleFilesDir( 167 "module-files-dir", 168 llvm::cl::desc("With '-generate-modules-path-args', paths to module files " 169 "in the generated command lines will begin with the " 170 "specified directory instead the module cache directory."), 171 llvm::cl::cat(DependencyScannerCategory)); 172 173 llvm::cl::opt<unsigned> 174 NumThreads("j", llvm::cl::Optional, 175 llvm::cl::desc("Number of worker threads to use (default: use " 176 "all concurrent threads)"), 177 llvm::cl::init(0), llvm::cl::cat(DependencyScannerCategory)); 178 179 llvm::cl::opt<std::string> 180 CompilationDB("compilation-database", 181 llvm::cl::desc("Compilation database"), llvm::cl::Required, 182 llvm::cl::cat(DependencyScannerCategory)); 183 184 llvm::cl::opt<bool> ReuseFileManager( 185 "reuse-filemanager", 186 llvm::cl::desc("Reuse the file manager and its cache between invocations."), 187 llvm::cl::init(true), llvm::cl::cat(DependencyScannerCategory)); 188 189 llvm::cl::opt<bool> SkipExcludedPPRanges( 190 "skip-excluded-pp-ranges", 191 llvm::cl::desc( 192 "Use the preprocessor optimization that skips excluded conditionals by " 193 "bumping the buffer pointer in the lexer instead of lexing the tokens " 194 "until reaching the end directive."), 195 llvm::cl::init(true), llvm::cl::cat(DependencyScannerCategory)); 196 197 llvm::cl::opt<bool> Verbose("v", llvm::cl::Optional, 198 llvm::cl::desc("Use verbose output."), 199 llvm::cl::init(false), 200 llvm::cl::cat(DependencyScannerCategory)); 201 202 } // end anonymous namespace 203 204 class SingleCommandCompilationDatabase : public tooling::CompilationDatabase { 205 public: 206 SingleCommandCompilationDatabase(tooling::CompileCommand Cmd) 207 : Command(std::move(Cmd)) {} 208 209 std::vector<tooling::CompileCommand> 210 getCompileCommands(StringRef FilePath) const override { 211 return {Command}; 212 } 213 214 std::vector<tooling::CompileCommand> getAllCompileCommands() const override { 215 return {Command}; 216 } 217 218 private: 219 tooling::CompileCommand Command; 220 }; 221 222 /// Takes the result of a dependency scan and prints error / dependency files 223 /// based on the result. 224 /// 225 /// \returns True on error. 226 static bool 227 handleMakeDependencyToolResult(const std::string &Input, 228 llvm::Expected<std::string> &MaybeFile, 229 SharedStream &OS, SharedStream &Errs) { 230 if (!MaybeFile) { 231 llvm::handleAllErrors( 232 MaybeFile.takeError(), [&Input, &Errs](llvm::StringError &Err) { 233 Errs.applyLocked([&](raw_ostream &OS) { 234 OS << "Error while scanning dependencies for " << Input << ":\n"; 235 OS << Err.getMessage(); 236 }); 237 }); 238 return true; 239 } 240 OS.applyLocked([&](raw_ostream &OS) { OS << *MaybeFile; }); 241 return false; 242 } 243 244 static llvm::json::Array toJSONSorted(const llvm::StringSet<> &Set) { 245 std::vector<llvm::StringRef> Strings; 246 for (auto &&I : Set) 247 Strings.push_back(I.getKey()); 248 llvm::sort(Strings); 249 return llvm::json::Array(Strings); 250 } 251 252 static llvm::json::Array toJSONSorted(std::vector<ModuleID> V) { 253 llvm::sort(V, [](const ModuleID &A, const ModuleID &B) { 254 return std::tie(A.ModuleName, A.ContextHash) < 255 std::tie(B.ModuleName, B.ContextHash); 256 }); 257 258 llvm::json::Array Ret; 259 for (const ModuleID &MID : V) 260 Ret.push_back(llvm::json::Object( 261 {{"module-name", MID.ModuleName}, {"context-hash", MID.ContextHash}})); 262 return Ret; 263 } 264 265 // Thread safe. 266 class FullDeps { 267 public: 268 void mergeDeps(StringRef Input, FullDependenciesResult FDR, 269 size_t InputIndex) { 270 const FullDependencies &FD = FDR.FullDeps; 271 272 InputDeps ID; 273 ID.FileName = std::string(Input); 274 ID.ContextHash = std::move(FD.ID.ContextHash); 275 ID.FileDeps = std::move(FD.FileDeps); 276 ID.ModuleDeps = std::move(FD.ClangModuleDeps); 277 278 std::unique_lock<std::mutex> ul(Lock); 279 for (const ModuleDeps &MD : FDR.DiscoveredModules) { 280 auto I = Modules.find({MD.ID, 0}); 281 if (I != Modules.end()) { 282 I->first.InputIndex = std::min(I->first.InputIndex, InputIndex); 283 continue; 284 } 285 Modules.insert(I, {{MD.ID, InputIndex}, std::move(MD)}); 286 } 287 288 ID.AdditionalCommandLine = 289 GenerateModulesPathArgs 290 ? FD.getAdditionalArgs( 291 [&](ModuleID MID) { return lookupPCMPath(MID); }, 292 [&](ModuleID MID) -> const ModuleDeps & { 293 return lookupModuleDeps(MID); 294 }) 295 : FD.getAdditionalArgsWithoutModulePaths(); 296 297 Inputs.push_back(std::move(ID)); 298 } 299 300 void printFullOutput(raw_ostream &OS) { 301 // Sort the modules by name to get a deterministic order. 302 std::vector<IndexedModuleID> ModuleIDs; 303 for (auto &&M : Modules) 304 ModuleIDs.push_back(M.first); 305 llvm::sort(ModuleIDs, 306 [](const IndexedModuleID &A, const IndexedModuleID &B) { 307 return std::tie(A.ID.ModuleName, A.InputIndex) < 308 std::tie(B.ID.ModuleName, B.InputIndex); 309 }); 310 311 llvm::sort(Inputs, [](const InputDeps &A, const InputDeps &B) { 312 return A.FileName < B.FileName; 313 }); 314 315 using namespace llvm::json; 316 317 Array OutModules; 318 for (auto &&ModID : ModuleIDs) { 319 auto &MD = Modules[ModID]; 320 Object O{ 321 {"name", MD.ID.ModuleName}, 322 {"context-hash", MD.ID.ContextHash}, 323 {"file-deps", toJSONSorted(MD.FileDeps)}, 324 {"clang-module-deps", toJSONSorted(MD.ClangModuleDeps)}, 325 {"clang-modulemap-file", MD.ClangModuleMapFile}, 326 {"command-line", 327 GenerateModulesPathArgs 328 ? MD.getCanonicalCommandLine( 329 [&](ModuleID MID) { return lookupPCMPath(MID); }, 330 [&](ModuleID MID) -> const ModuleDeps & { 331 return lookupModuleDeps(MID); 332 }) 333 : MD.getCanonicalCommandLineWithoutModulePaths()}, 334 }; 335 OutModules.push_back(std::move(O)); 336 } 337 338 Array TUs; 339 for (auto &&I : Inputs) { 340 Object O{ 341 {"input-file", I.FileName}, 342 {"clang-context-hash", I.ContextHash}, 343 {"file-deps", I.FileDeps}, 344 {"clang-module-deps", toJSONSorted(I.ModuleDeps)}, 345 {"command-line", I.AdditionalCommandLine}, 346 }; 347 TUs.push_back(std::move(O)); 348 } 349 350 Object Output{ 351 {"modules", std::move(OutModules)}, 352 {"translation-units", std::move(TUs)}, 353 }; 354 355 OS << llvm::formatv("{0:2}\n", Value(std::move(Output))); 356 } 357 358 private: 359 StringRef lookupPCMPath(ModuleID MID) { 360 auto PCMPath = PCMPaths.insert({MID, ""}); 361 if (PCMPath.second) 362 PCMPath.first->second = constructPCMPath(lookupModuleDeps(MID)); 363 return PCMPath.first->second; 364 } 365 366 /// Construct a path for the explicitly built PCM. 367 std::string constructPCMPath(const ModuleDeps &MD) const { 368 StringRef Filename = llvm::sys::path::filename(MD.ImplicitModulePCMPath); 369 370 SmallString<256> ExplicitPCMPath( 371 !ModuleFilesDir.empty() 372 ? ModuleFilesDir 373 : MD.Invocation.getHeaderSearchOpts().ModuleCachePath); 374 llvm::sys::path::append(ExplicitPCMPath, MD.ID.ContextHash, Filename); 375 return std::string(ExplicitPCMPath); 376 } 377 378 const ModuleDeps &lookupModuleDeps(ModuleID MID) { 379 auto I = Modules.find(IndexedModuleID{MID, 0}); 380 assert(I != Modules.end()); 381 return I->second; 382 }; 383 384 struct IndexedModuleID { 385 ModuleID ID; 386 mutable size_t InputIndex; 387 388 bool operator==(const IndexedModuleID &Other) const { 389 return ID.ModuleName == Other.ID.ModuleName && 390 ID.ContextHash == Other.ID.ContextHash; 391 } 392 }; 393 394 struct IndexedModuleIDHasher { 395 std::size_t operator()(const IndexedModuleID &IMID) const { 396 using llvm::hash_combine; 397 398 return hash_combine(IMID.ID.ModuleName, IMID.ID.ContextHash); 399 } 400 }; 401 402 struct InputDeps { 403 std::string FileName; 404 std::string ContextHash; 405 std::vector<std::string> FileDeps; 406 std::vector<ModuleID> ModuleDeps; 407 std::vector<std::string> AdditionalCommandLine; 408 }; 409 410 std::mutex Lock; 411 std::unordered_map<IndexedModuleID, ModuleDeps, IndexedModuleIDHasher> 412 Modules; 413 std::unordered_map<ModuleID, std::string, ModuleIDHasher> PCMPaths; 414 std::vector<InputDeps> Inputs; 415 }; 416 417 static bool handleFullDependencyToolResult( 418 const std::string &Input, 419 llvm::Expected<FullDependenciesResult> &MaybeFullDeps, FullDeps &FD, 420 size_t InputIndex, SharedStream &OS, SharedStream &Errs) { 421 if (!MaybeFullDeps) { 422 llvm::handleAllErrors( 423 MaybeFullDeps.takeError(), [&Input, &Errs](llvm::StringError &Err) { 424 Errs.applyLocked([&](raw_ostream &OS) { 425 OS << "Error while scanning dependencies for " << Input << ":\n"; 426 OS << Err.getMessage(); 427 }); 428 }); 429 return true; 430 } 431 FD.mergeDeps(Input, std::move(*MaybeFullDeps), InputIndex); 432 return false; 433 } 434 435 int main(int argc, const char **argv) { 436 llvm::InitLLVM X(argc, argv); 437 llvm::cl::HideUnrelatedOptions(DependencyScannerCategory); 438 if (!llvm::cl::ParseCommandLineOptions(argc, argv)) 439 return 1; 440 441 std::string ErrorMessage; 442 std::unique_ptr<tooling::JSONCompilationDatabase> Compilations = 443 tooling::JSONCompilationDatabase::loadFromFile( 444 CompilationDB, ErrorMessage, 445 tooling::JSONCommandLineSyntax::AutoDetect); 446 if (!Compilations) { 447 llvm::errs() << "error: " << ErrorMessage << "\n"; 448 return 1; 449 } 450 451 llvm::cl::PrintOptionValues(); 452 453 // The command options are rewritten to run Clang in preprocessor only mode. 454 auto AdjustingCompilations = 455 std::make_unique<tooling::ArgumentsAdjustingCompilations>( 456 std::move(Compilations)); 457 ResourceDirectoryCache ResourceDirCache; 458 459 AdjustingCompilations->appendArgumentsAdjuster( 460 [&ResourceDirCache](const tooling::CommandLineArguments &Args, 461 StringRef FileName) { 462 std::string LastO = ""; 463 bool HasResourceDir = false; 464 bool ClangCLMode = false; 465 auto FlagsEnd = llvm::find(Args, "--"); 466 if (FlagsEnd != Args.begin()) { 467 ClangCLMode = 468 llvm::sys::path::stem(Args[0]).contains_insensitive("clang-cl") || 469 llvm::is_contained(Args, "--driver-mode=cl"); 470 471 // Reverse scan, starting at the end or at the element before "--". 472 auto R = llvm::make_reverse_iterator(FlagsEnd); 473 for (auto I = R, E = Args.rend(); I != E; ++I) { 474 StringRef Arg = *I; 475 if (ClangCLMode) { 476 // Ignore arguments that are preceded by "-Xclang". 477 if ((I + 1) != E && I[1] == "-Xclang") 478 continue; 479 if (LastO.empty()) { 480 // With clang-cl, the output obj file can be specified with 481 // "/opath", "/o path", "/Fopath", and the dash counterparts. 482 // Also, clang-cl adds ".obj" extension if none is found. 483 if ((Arg == "-o" || Arg == "/o") && I != R) 484 LastO = I[-1]; // Next argument (reverse iterator) 485 else if (Arg.startswith("/Fo") || Arg.startswith("-Fo")) 486 LastO = Arg.drop_front(3).str(); 487 else if (Arg.startswith("/o") || Arg.startswith("-o")) 488 LastO = Arg.drop_front(2).str(); 489 490 if (!LastO.empty() && !llvm::sys::path::has_extension(LastO)) 491 LastO.append(".obj"); 492 } 493 } 494 if (Arg == "-resource-dir") 495 HasResourceDir = true; 496 } 497 } 498 tooling::CommandLineArguments AdjustedArgs(Args.begin(), FlagsEnd); 499 // The clang-cl driver passes "-o -" to the frontend. Inject the real 500 // file here to ensure "-MT" can be deduced if need be. 501 if (ClangCLMode && !LastO.empty()) { 502 AdjustedArgs.push_back("/clang:-o"); 503 AdjustedArgs.push_back("/clang:" + LastO); 504 } 505 506 if (!HasResourceDir) { 507 StringRef ResourceDir = 508 ResourceDirCache.findResourceDir(Args, ClangCLMode); 509 if (!ResourceDir.empty()) { 510 AdjustedArgs.push_back("-resource-dir"); 511 AdjustedArgs.push_back(std::string(ResourceDir)); 512 } 513 } 514 AdjustedArgs.insert(AdjustedArgs.end(), FlagsEnd, Args.end()); 515 return AdjustedArgs; 516 }); 517 518 SharedStream Errs(llvm::errs()); 519 // Print out the dependency results to STDOUT by default. 520 SharedStream DependencyOS(llvm::outs()); 521 522 DependencyScanningService Service(ScanMode, Format, ReuseFileManager, 523 SkipExcludedPPRanges); 524 llvm::ThreadPool Pool(llvm::hardware_concurrency(NumThreads)); 525 std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools; 526 for (unsigned I = 0; I < Pool.getThreadCount(); ++I) 527 WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service)); 528 529 std::vector<SingleCommandCompilationDatabase> Inputs; 530 for (tooling::CompileCommand Cmd : 531 AdjustingCompilations->getAllCompileCommands()) 532 Inputs.emplace_back(Cmd); 533 534 std::atomic<bool> HadErrors(false); 535 FullDeps FD; 536 std::mutex Lock; 537 size_t Index = 0; 538 539 if (Verbose) { 540 llvm::outs() << "Running clang-scan-deps on " << Inputs.size() 541 << " files using " << Pool.getThreadCount() << " workers\n"; 542 } 543 for (unsigned I = 0; I < Pool.getThreadCount(); ++I) { 544 Pool.async([I, &Lock, &Index, &Inputs, &HadErrors, &FD, &WorkerTools, 545 &DependencyOS, &Errs]() { 546 llvm::StringSet<> AlreadySeenModules; 547 while (true) { 548 const SingleCommandCompilationDatabase *Input; 549 std::string Filename; 550 std::string CWD; 551 size_t LocalIndex; 552 // Take the next input. 553 { 554 std::unique_lock<std::mutex> LockGuard(Lock); 555 if (Index >= Inputs.size()) 556 return; 557 LocalIndex = Index; 558 Input = &Inputs[Index++]; 559 tooling::CompileCommand Cmd = Input->getAllCompileCommands()[0]; 560 Filename = std::move(Cmd.Filename); 561 CWD = std::move(Cmd.Directory); 562 } 563 // Run the tool on it. 564 if (Format == ScanningOutputFormat::Make) { 565 auto MaybeFile = WorkerTools[I]->getDependencyFile(*Input, CWD); 566 if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS, 567 Errs)) 568 HadErrors = true; 569 } else { 570 auto MaybeFullDeps = WorkerTools[I]->getFullDependencies( 571 *Input, CWD, AlreadySeenModules); 572 if (handleFullDependencyToolResult(Filename, MaybeFullDeps, FD, 573 LocalIndex, DependencyOS, Errs)) 574 HadErrors = true; 575 } 576 } 577 }); 578 } 579 Pool.wait(); 580 581 if (Format == ScanningOutputFormat::Full) 582 FD.printFullOutput(llvm::outs()); 583 584 return HadErrors; 585 } 586