1 //===- ClangScanDeps.cpp - Implementation of clang-scan-deps --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Frontend/CompilerInstance.h"
10 #include "clang/Tooling/CommonOptionsParser.h"
11 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
12 #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
13 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
14 #include "clang/Tooling/JSONCompilationDatabase.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/FileUtilities.h"
19 #include "llvm/Support/InitLLVM.h"
20 #include "llvm/Support/JSON.h"
21 #include "llvm/Support/Program.h"
22 #include "llvm/Support/Signals.h"
23 #include "llvm/Support/ThreadPool.h"
24 #include "llvm/Support/Threading.h"
25 #include <mutex>
26 #include <thread>
27 
28 using namespace clang;
29 using namespace tooling::dependencies;
30 
31 namespace {
32 
33 class SharedStream {
34 public:
35   SharedStream(raw_ostream &OS) : OS(OS) {}
36   void applyLocked(llvm::function_ref<void(raw_ostream &OS)> Fn) {
37     std::unique_lock<std::mutex> LockGuard(Lock);
38     Fn(OS);
39     OS.flush();
40   }
41 
42 private:
43   std::mutex Lock;
44   raw_ostream &OS;
45 };
46 
47 class ResourceDirectoryCache {
48 public:
49   /// findResourceDir finds the resource directory relative to the clang
50   /// compiler being used in Args, by running it with "-print-resource-dir"
51   /// option and cache the results for reuse. \returns resource directory path
52   /// associated with the given invocation command or empty string if the
53   /// compiler path is NOT an absolute path.
54   StringRef findResourceDir(const tooling::CommandLineArguments &Args,
55                             bool ClangCLMode) {
56     if (Args.size() < 1)
57       return "";
58 
59     const std::string &ClangBinaryPath = Args[0];
60     if (!llvm::sys::path::is_absolute(ClangBinaryPath))
61       return "";
62 
63     const std::string &ClangBinaryName =
64         std::string(llvm::sys::path::filename(ClangBinaryPath));
65 
66     std::unique_lock<std::mutex> LockGuard(CacheLock);
67     const auto &CachedResourceDir = Cache.find(ClangBinaryPath);
68     if (CachedResourceDir != Cache.end())
69       return CachedResourceDir->second;
70 
71     std::vector<StringRef> PrintResourceDirArgs{ClangBinaryName};
72     if (ClangCLMode)
73       PrintResourceDirArgs.push_back("/clang:-print-resource-dir");
74     else
75       PrintResourceDirArgs.push_back("-print-resource-dir");
76 
77     llvm::SmallString<64> OutputFile, ErrorFile;
78     llvm::sys::fs::createTemporaryFile("print-resource-dir-output",
79                                        "" /*no-suffix*/, OutputFile);
80     llvm::sys::fs::createTemporaryFile("print-resource-dir-error",
81                                        "" /*no-suffix*/, ErrorFile);
82     llvm::FileRemover OutputRemover(OutputFile.c_str());
83     llvm::FileRemover ErrorRemover(ErrorFile.c_str());
84     llvm::Optional<StringRef> Redirects[] = {
85         {""}, // Stdin
86         OutputFile.str(),
87         ErrorFile.str(),
88     };
89     if (const int RC = llvm::sys::ExecuteAndWait(
90             ClangBinaryPath, PrintResourceDirArgs, {}, Redirects)) {
91       auto ErrorBuf = llvm::MemoryBuffer::getFile(ErrorFile.c_str());
92       llvm::errs() << ErrorBuf.get()->getBuffer();
93       return "";
94     }
95 
96     auto OutputBuf = llvm::MemoryBuffer::getFile(OutputFile.c_str());
97     if (!OutputBuf)
98       return "";
99     StringRef Output = OutputBuf.get()->getBuffer().rtrim('\n');
100 
101     Cache[ClangBinaryPath] = Output.str();
102     return Cache[ClangBinaryPath];
103   }
104 
105 private:
106   std::map<std::string, std::string> Cache;
107   std::mutex CacheLock;
108 };
109 
110 llvm::cl::opt<bool> Help("h", llvm::cl::desc("Alias for -help"),
111                          llvm::cl::Hidden);
112 
113 llvm::cl::OptionCategory DependencyScannerCategory("Tool options");
114 
115 static llvm::cl::opt<ScanningMode> ScanMode(
116     "mode",
117     llvm::cl::desc("The preprocessing mode used to compute the dependencies"),
118     llvm::cl::values(
119         clEnumValN(ScanningMode::MinimizedSourcePreprocessing,
120                    "preprocess-minimized-sources",
121                    "The set of dependencies is computed by preprocessing the "
122                    "source files that were minimized to only include the "
123                    "contents that might affect the dependencies"),
124         clEnumValN(ScanningMode::CanonicalPreprocessing, "preprocess",
125                    "The set of dependencies is computed by preprocessing the "
126                    "unmodified source files")),
127     llvm::cl::init(ScanningMode::MinimizedSourcePreprocessing),
128     llvm::cl::cat(DependencyScannerCategory));
129 
130 static llvm::cl::opt<ScanningOutputFormat> Format(
131     "format", llvm::cl::desc("The output format for the dependencies"),
132     llvm::cl::values(clEnumValN(ScanningOutputFormat::Make, "make",
133                                 "Makefile compatible dep file"),
134                      clEnumValN(ScanningOutputFormat::Full, "experimental-full",
135                                 "Full dependency graph suitable"
136                                 " for explicitly building modules. This format "
137                                 "is experimental and will change.")),
138     llvm::cl::init(ScanningOutputFormat::Make),
139     llvm::cl::cat(DependencyScannerCategory));
140 
141 // This mode is mostly useful for development of explicitly built modules.
142 // Command lines will contain arguments specifying modulemap file paths and
143 // absolute paths to PCM files in the module cache directory.
144 //
145 // Build tools that want to put the PCM files in a different location should use
146 // the C++ APIs instead, of which there are two flavors:
147 //
148 // 1. APIs that generate arguments with paths to modulemap and PCM files via
149 //    callbacks provided by the client:
150 //     * ModuleDeps::getCanonicalCommandLine(LookupPCMPath, LookupModuleDeps)
151 //     * FullDependencies::getAdditionalArgs(LookupPCMPath, LookupModuleDeps)
152 //
153 // 2. APIs that don't generate arguments with paths to modulemap or PCM files
154 //    and instead expect the client to append them manually after the fact:
155 //     * ModuleDeps::getCanonicalCommandLineWithoutModulePaths()
156 //     * FullDependencies::getAdditionalArgsWithoutModulePaths()
157 //
158 static llvm::cl::opt<bool> GenerateModulesPathArgs(
159     "generate-modules-path-args",
160     llvm::cl::desc(
161         "With '-format experimental-full', include arguments specifying "
162         "modules-related paths in the generated command lines: "
163         "'-fmodule-file=', '-o', '-fmodule-map-file='."),
164     llvm::cl::init(false), llvm::cl::cat(DependencyScannerCategory));
165 
166 static llvm::cl::opt<std::string> ModuleFilesDir(
167     "module-files-dir",
168     llvm::cl::desc("With '-generate-modules-path-args', paths to module files "
169                    "in the generated command lines will begin with the "
170                    "specified directory instead the module cache directory."),
171     llvm::cl::cat(DependencyScannerCategory));
172 
173 llvm::cl::opt<unsigned>
174     NumThreads("j", llvm::cl::Optional,
175                llvm::cl::desc("Number of worker threads to use (default: use "
176                               "all concurrent threads)"),
177                llvm::cl::init(0), llvm::cl::cat(DependencyScannerCategory));
178 
179 llvm::cl::opt<std::string>
180     CompilationDB("compilation-database",
181                   llvm::cl::desc("Compilation database"), llvm::cl::Required,
182                   llvm::cl::cat(DependencyScannerCategory));
183 
184 llvm::cl::opt<bool> ReuseFileManager(
185     "reuse-filemanager",
186     llvm::cl::desc("Reuse the file manager and its cache between invocations."),
187     llvm::cl::init(true), llvm::cl::cat(DependencyScannerCategory));
188 
189 llvm::cl::opt<bool> SkipExcludedPPRanges(
190     "skip-excluded-pp-ranges",
191     llvm::cl::desc(
192         "Use the preprocessor optimization that skips excluded conditionals by "
193         "bumping the buffer pointer in the lexer instead of lexing the tokens  "
194         "until reaching the end directive."),
195     llvm::cl::init(true), llvm::cl::cat(DependencyScannerCategory));
196 
197 llvm::cl::opt<bool> Verbose("v", llvm::cl::Optional,
198                             llvm::cl::desc("Use verbose output."),
199                             llvm::cl::init(false),
200                             llvm::cl::cat(DependencyScannerCategory));
201 
202 } // end anonymous namespace
203 
204 class SingleCommandCompilationDatabase : public tooling::CompilationDatabase {
205 public:
206   SingleCommandCompilationDatabase(tooling::CompileCommand Cmd)
207       : Command(std::move(Cmd)) {}
208 
209   std::vector<tooling::CompileCommand>
210   getCompileCommands(StringRef FilePath) const override {
211     return {Command};
212   }
213 
214   std::vector<tooling::CompileCommand> getAllCompileCommands() const override {
215     return {Command};
216   }
217 
218 private:
219   tooling::CompileCommand Command;
220 };
221 
222 /// Takes the result of a dependency scan and prints error / dependency files
223 /// based on the result.
224 ///
225 /// \returns True on error.
226 static bool
227 handleMakeDependencyToolResult(const std::string &Input,
228                                llvm::Expected<std::string> &MaybeFile,
229                                SharedStream &OS, SharedStream &Errs) {
230   if (!MaybeFile) {
231     llvm::handleAllErrors(
232         MaybeFile.takeError(), [&Input, &Errs](llvm::StringError &Err) {
233           Errs.applyLocked([&](raw_ostream &OS) {
234             OS << "Error while scanning dependencies for " << Input << ":\n";
235             OS << Err.getMessage();
236           });
237         });
238     return true;
239   }
240   OS.applyLocked([&](raw_ostream &OS) { OS << *MaybeFile; });
241   return false;
242 }
243 
244 static llvm::json::Array toJSONSorted(const llvm::StringSet<> &Set) {
245   std::vector<llvm::StringRef> Strings;
246   for (auto &&I : Set)
247     Strings.push_back(I.getKey());
248   llvm::sort(Strings);
249   return llvm::json::Array(Strings);
250 }
251 
252 static llvm::json::Array toJSONSorted(std::vector<ModuleID> V) {
253   llvm::sort(V, [](const ModuleID &A, const ModuleID &B) {
254     return std::tie(A.ModuleName, A.ContextHash) <
255            std::tie(B.ModuleName, B.ContextHash);
256   });
257 
258   llvm::json::Array Ret;
259   for (const ModuleID &MID : V)
260     Ret.push_back(llvm::json::Object(
261         {{"module-name", MID.ModuleName}, {"context-hash", MID.ContextHash}}));
262   return Ret;
263 }
264 
265 // Thread safe.
266 class FullDeps {
267 public:
268   void mergeDeps(StringRef Input, FullDependenciesResult FDR,
269                  size_t InputIndex) {
270     const FullDependencies &FD = FDR.FullDeps;
271 
272     InputDeps ID;
273     ID.FileName = std::string(Input);
274     ID.ContextHash = std::move(FD.ID.ContextHash);
275     ID.FileDeps = std::move(FD.FileDeps);
276     ID.ModuleDeps = std::move(FD.ClangModuleDeps);
277 
278     std::unique_lock<std::mutex> ul(Lock);
279     for (const ModuleDeps &MD : FDR.DiscoveredModules) {
280       auto I = Modules.find({MD.ID, 0});
281       if (I != Modules.end()) {
282         I->first.InputIndex = std::min(I->first.InputIndex, InputIndex);
283         continue;
284       }
285       Modules.insert(I, {{MD.ID, InputIndex}, std::move(MD)});
286     }
287 
288     ID.AdditionalCommandLine =
289         GenerateModulesPathArgs
290             ? FD.getAdditionalArgs(
291                   [&](ModuleID MID) { return lookupPCMPath(MID); },
292                   [&](ModuleID MID) -> const ModuleDeps & {
293                     return lookupModuleDeps(MID);
294                   })
295             : FD.getAdditionalArgsWithoutModulePaths();
296 
297     Inputs.push_back(std::move(ID));
298   }
299 
300   void printFullOutput(raw_ostream &OS) {
301     // Sort the modules by name to get a deterministic order.
302     std::vector<IndexedModuleID> ModuleIDs;
303     for (auto &&M : Modules)
304       ModuleIDs.push_back(M.first);
305     llvm::sort(ModuleIDs,
306                [](const IndexedModuleID &A, const IndexedModuleID &B) {
307                  return std::tie(A.ID.ModuleName, A.InputIndex) <
308                         std::tie(B.ID.ModuleName, B.InputIndex);
309                });
310 
311     llvm::sort(Inputs, [](const InputDeps &A, const InputDeps &B) {
312       return A.FileName < B.FileName;
313     });
314 
315     using namespace llvm::json;
316 
317     Array OutModules;
318     for (auto &&ModID : ModuleIDs) {
319       auto &MD = Modules[ModID];
320       Object O{
321           {"name", MD.ID.ModuleName},
322           {"context-hash", MD.ID.ContextHash},
323           {"file-deps", toJSONSorted(MD.FileDeps)},
324           {"clang-module-deps", toJSONSorted(MD.ClangModuleDeps)},
325           {"clang-modulemap-file", MD.ClangModuleMapFile},
326           {"command-line",
327            GenerateModulesPathArgs
328                ? MD.getCanonicalCommandLine(
329                      [&](ModuleID MID) { return lookupPCMPath(MID); },
330                      [&](ModuleID MID) -> const ModuleDeps & {
331                        return lookupModuleDeps(MID);
332                      })
333                : MD.getCanonicalCommandLineWithoutModulePaths()},
334       };
335       OutModules.push_back(std::move(O));
336     }
337 
338     Array TUs;
339     for (auto &&I : Inputs) {
340       Object O{
341           {"input-file", I.FileName},
342           {"clang-context-hash", I.ContextHash},
343           {"file-deps", I.FileDeps},
344           {"clang-module-deps", toJSONSorted(I.ModuleDeps)},
345           {"command-line", I.AdditionalCommandLine},
346       };
347       TUs.push_back(std::move(O));
348     }
349 
350     Object Output{
351         {"modules", std::move(OutModules)},
352         {"translation-units", std::move(TUs)},
353     };
354 
355     OS << llvm::formatv("{0:2}\n", Value(std::move(Output)));
356   }
357 
358 private:
359   StringRef lookupPCMPath(ModuleID MID) {
360     auto PCMPath = PCMPaths.insert({MID, ""});
361     if (PCMPath.second)
362       PCMPath.first->second = constructPCMPath(lookupModuleDeps(MID));
363     return PCMPath.first->second;
364   }
365 
366   /// Construct a path for the explicitly built PCM.
367   std::string constructPCMPath(const ModuleDeps &MD) const {
368     StringRef Filename = llvm::sys::path::filename(MD.ImplicitModulePCMPath);
369 
370     SmallString<256> ExplicitPCMPath(
371         !ModuleFilesDir.empty()
372             ? ModuleFilesDir
373             : MD.Invocation.getHeaderSearchOpts().ModuleCachePath);
374     llvm::sys::path::append(ExplicitPCMPath, MD.ID.ContextHash, Filename);
375     return std::string(ExplicitPCMPath);
376   }
377 
378   const ModuleDeps &lookupModuleDeps(ModuleID MID) {
379     auto I = Modules.find(IndexedModuleID{MID, 0});
380     assert(I != Modules.end());
381     return I->second;
382   };
383 
384   struct IndexedModuleID {
385     ModuleID ID;
386     mutable size_t InputIndex;
387 
388     bool operator==(const IndexedModuleID &Other) const {
389       return ID.ModuleName == Other.ID.ModuleName &&
390              ID.ContextHash == Other.ID.ContextHash;
391     }
392   };
393 
394   struct IndexedModuleIDHasher {
395     std::size_t operator()(const IndexedModuleID &IMID) const {
396       using llvm::hash_combine;
397 
398       return hash_combine(IMID.ID.ModuleName, IMID.ID.ContextHash);
399     }
400   };
401 
402   struct InputDeps {
403     std::string FileName;
404     std::string ContextHash;
405     std::vector<std::string> FileDeps;
406     std::vector<ModuleID> ModuleDeps;
407     std::vector<std::string> AdditionalCommandLine;
408   };
409 
410   std::mutex Lock;
411   std::unordered_map<IndexedModuleID, ModuleDeps, IndexedModuleIDHasher>
412       Modules;
413   std::unordered_map<ModuleID, std::string, ModuleIDHasher> PCMPaths;
414   std::vector<InputDeps> Inputs;
415 };
416 
417 static bool handleFullDependencyToolResult(
418     const std::string &Input,
419     llvm::Expected<FullDependenciesResult> &MaybeFullDeps, FullDeps &FD,
420     size_t InputIndex, SharedStream &OS, SharedStream &Errs) {
421   if (!MaybeFullDeps) {
422     llvm::handleAllErrors(
423         MaybeFullDeps.takeError(), [&Input, &Errs](llvm::StringError &Err) {
424           Errs.applyLocked([&](raw_ostream &OS) {
425             OS << "Error while scanning dependencies for " << Input << ":\n";
426             OS << Err.getMessage();
427           });
428         });
429     return true;
430   }
431   FD.mergeDeps(Input, std::move(*MaybeFullDeps), InputIndex);
432   return false;
433 }
434 
435 int main(int argc, const char **argv) {
436   llvm::InitLLVM X(argc, argv);
437   llvm::cl::HideUnrelatedOptions(DependencyScannerCategory);
438   if (!llvm::cl::ParseCommandLineOptions(argc, argv))
439     return 1;
440 
441   std::string ErrorMessage;
442   std::unique_ptr<tooling::JSONCompilationDatabase> Compilations =
443       tooling::JSONCompilationDatabase::loadFromFile(
444           CompilationDB, ErrorMessage,
445           tooling::JSONCommandLineSyntax::AutoDetect);
446   if (!Compilations) {
447     llvm::errs() << "error: " << ErrorMessage << "\n";
448     return 1;
449   }
450 
451   llvm::cl::PrintOptionValues();
452 
453   // The command options are rewritten to run Clang in preprocessor only mode.
454   auto AdjustingCompilations =
455       std::make_unique<tooling::ArgumentsAdjustingCompilations>(
456           std::move(Compilations));
457   ResourceDirectoryCache ResourceDirCache;
458 
459   AdjustingCompilations->appendArgumentsAdjuster(
460       [&ResourceDirCache](const tooling::CommandLineArguments &Args,
461                           StringRef FileName) {
462         std::string LastO = "";
463         bool HasResourceDir = false;
464         bool ClangCLMode = false;
465         auto FlagsEnd = llvm::find(Args, "--");
466         if (FlagsEnd != Args.begin()) {
467           ClangCLMode =
468               llvm::sys::path::stem(Args[0]).contains_insensitive("clang-cl") ||
469               llvm::is_contained(Args, "--driver-mode=cl");
470 
471           // Reverse scan, starting at the end or at the element before "--".
472           auto R = llvm::make_reverse_iterator(FlagsEnd);
473           for (auto I = R, E = Args.rend(); I != E; ++I) {
474             StringRef Arg = *I;
475             if (ClangCLMode) {
476               // Ignore arguments that are preceded by "-Xclang".
477               if ((I + 1) != E && I[1] == "-Xclang")
478                 continue;
479               if (LastO.empty()) {
480                 // With clang-cl, the output obj file can be specified with
481                 // "/opath", "/o path", "/Fopath", and the dash counterparts.
482                 // Also, clang-cl adds ".obj" extension if none is found.
483                 if ((Arg == "-o" || Arg == "/o") && I != R)
484                   LastO = I[-1]; // Next argument (reverse iterator)
485                 else if (Arg.startswith("/Fo") || Arg.startswith("-Fo"))
486                   LastO = Arg.drop_front(3).str();
487                 else if (Arg.startswith("/o") || Arg.startswith("-o"))
488                   LastO = Arg.drop_front(2).str();
489 
490                 if (!LastO.empty() && !llvm::sys::path::has_extension(LastO))
491                   LastO.append(".obj");
492               }
493             }
494             if (Arg == "-resource-dir")
495               HasResourceDir = true;
496           }
497         }
498         tooling::CommandLineArguments AdjustedArgs(Args.begin(), FlagsEnd);
499         // The clang-cl driver passes "-o -" to the frontend. Inject the real
500         // file here to ensure "-MT" can be deduced if need be.
501         if (ClangCLMode && !LastO.empty()) {
502           AdjustedArgs.push_back("/clang:-o");
503           AdjustedArgs.push_back("/clang:" + LastO);
504         }
505 
506         if (!HasResourceDir) {
507           StringRef ResourceDir =
508               ResourceDirCache.findResourceDir(Args, ClangCLMode);
509           if (!ResourceDir.empty()) {
510             AdjustedArgs.push_back("-resource-dir");
511             AdjustedArgs.push_back(std::string(ResourceDir));
512           }
513         }
514         AdjustedArgs.insert(AdjustedArgs.end(), FlagsEnd, Args.end());
515         return AdjustedArgs;
516       });
517 
518   SharedStream Errs(llvm::errs());
519   // Print out the dependency results to STDOUT by default.
520   SharedStream DependencyOS(llvm::outs());
521 
522   DependencyScanningService Service(ScanMode, Format, ReuseFileManager,
523                                     SkipExcludedPPRanges);
524   llvm::ThreadPool Pool(llvm::hardware_concurrency(NumThreads));
525   std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools;
526   for (unsigned I = 0; I < Pool.getThreadCount(); ++I)
527     WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
528 
529   std::vector<SingleCommandCompilationDatabase> Inputs;
530   for (tooling::CompileCommand Cmd :
531        AdjustingCompilations->getAllCompileCommands())
532     Inputs.emplace_back(Cmd);
533 
534   std::atomic<bool> HadErrors(false);
535   FullDeps FD;
536   std::mutex Lock;
537   size_t Index = 0;
538 
539   if (Verbose) {
540     llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
541                  << " files using " << Pool.getThreadCount() << " workers\n";
542   }
543   for (unsigned I = 0; I < Pool.getThreadCount(); ++I) {
544     Pool.async([I, &Lock, &Index, &Inputs, &HadErrors, &FD, &WorkerTools,
545                 &DependencyOS, &Errs]() {
546       llvm::StringSet<> AlreadySeenModules;
547       while (true) {
548         const SingleCommandCompilationDatabase *Input;
549         std::string Filename;
550         std::string CWD;
551         size_t LocalIndex;
552         // Take the next input.
553         {
554           std::unique_lock<std::mutex> LockGuard(Lock);
555           if (Index >= Inputs.size())
556             return;
557           LocalIndex = Index;
558           Input = &Inputs[Index++];
559           tooling::CompileCommand Cmd = Input->getAllCompileCommands()[0];
560           Filename = std::move(Cmd.Filename);
561           CWD = std::move(Cmd.Directory);
562         }
563         // Run the tool on it.
564         if (Format == ScanningOutputFormat::Make) {
565           auto MaybeFile = WorkerTools[I]->getDependencyFile(*Input, CWD);
566           if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
567                                              Errs))
568             HadErrors = true;
569         } else {
570           auto MaybeFullDeps = WorkerTools[I]->getFullDependencies(
571               *Input, CWD, AlreadySeenModules);
572           if (handleFullDependencyToolResult(Filename, MaybeFullDeps, FD,
573                                              LocalIndex, DependencyOS, Errs))
574             HadErrors = true;
575         }
576       }
577     });
578   }
579   Pool.wait();
580 
581   if (Format == ScanningOutputFormat::Full)
582     FD.printFullOutput(llvm::outs());
583 
584   return HadErrors;
585 }
586