1 //===- ClangScanDeps.cpp - Implementation of clang-scan-deps --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Frontend/CompilerInstance.h"
10 #include "clang/Tooling/CommonOptionsParser.h"
11 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
12 #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
13 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
14 #include "clang/Tooling/JSONCompilationDatabase.h"
15 #include "llvm/Support/CommandLine.h"
16 #include "llvm/Support/FileUtilities.h"
17 #include "llvm/Support/InitLLVM.h"
18 #include "llvm/Support/JSON.h"
19 #include "llvm/Support/Program.h"
20 #include "llvm/Support/Signals.h"
21 #include "llvm/Support/ThreadPool.h"
22 #include "llvm/Support/Threading.h"
23 #include <mutex>
24 #include <thread>
25 
26 using namespace clang;
27 using namespace tooling::dependencies;
28 
29 namespace {
30 
31 class SharedStream {
32 public:
SharedStream(raw_ostream & OS)33   SharedStream(raw_ostream &OS) : OS(OS) {}
applyLocked(llvm::function_ref<void (raw_ostream & OS)> Fn)34   void applyLocked(llvm::function_ref<void(raw_ostream &OS)> Fn) {
35     std::unique_lock<std::mutex> LockGuard(Lock);
36     Fn(OS);
37     OS.flush();
38   }
39 
40 private:
41   std::mutex Lock;
42   raw_ostream &OS;
43 };
44 
45 class ResourceDirectoryCache {
46 public:
47   /// findResourceDir finds the resource directory relative to the clang
48   /// compiler being used in Args, by running it with "-print-resource-dir"
49   /// option and cache the results for reuse. \returns resource directory path
50   /// associated with the given invocation command or empty string if the
51   /// compiler path is NOT an absolute path.
findResourceDir(const tooling::CommandLineArguments & Args)52   StringRef findResourceDir(const tooling::CommandLineArguments &Args) {
53     if (Args.size() < 1)
54       return "";
55 
56     const std::string &ClangBinaryPath = Args[0];
57     if (!llvm::sys::path::is_absolute(ClangBinaryPath))
58       return "";
59 
60     const std::string &ClangBinaryName =
61         std::string(llvm::sys::path::filename(ClangBinaryPath));
62 
63     std::unique_lock<std::mutex> LockGuard(CacheLock);
64     const auto &CachedResourceDir = Cache.find(ClangBinaryPath);
65     if (CachedResourceDir != Cache.end())
66       return CachedResourceDir->second;
67 
68     std::vector<StringRef> PrintResourceDirArgs{ClangBinaryName,
69                                                 "-print-resource-dir"};
70     llvm::SmallString<64> OutputFile, ErrorFile;
71     llvm::sys::fs::createTemporaryFile("print-resource-dir-output",
72                                        "" /*no-suffix*/, OutputFile);
73     llvm::sys::fs::createTemporaryFile("print-resource-dir-error",
74                                        "" /*no-suffix*/, ErrorFile);
75     llvm::FileRemover OutputRemover(OutputFile.c_str());
76     llvm::FileRemover ErrorRemover(ErrorFile.c_str());
77     llvm::Optional<StringRef> Redirects[] = {
78         {""}, // Stdin
79         StringRef(OutputFile),
80         StringRef(ErrorFile),
81     };
82     if (const int RC = llvm::sys::ExecuteAndWait(
83             ClangBinaryPath, PrintResourceDirArgs, {}, Redirects)) {
84       auto ErrorBuf = llvm::MemoryBuffer::getFile(ErrorFile.c_str());
85       llvm::errs() << ErrorBuf.get()->getBuffer();
86       return "";
87     }
88 
89     auto OutputBuf = llvm::MemoryBuffer::getFile(OutputFile.c_str());
90     if (!OutputBuf)
91       return "";
92     StringRef Output = OutputBuf.get()->getBuffer().rtrim('\n');
93 
94     Cache[ClangBinaryPath] = Output.str();
95     return Cache[ClangBinaryPath];
96   }
97 
98 private:
99   std::map<std::string, std::string> Cache;
100   std::mutex CacheLock;
101 };
102 
103 llvm::cl::opt<bool> Help("h", llvm::cl::desc("Alias for -help"),
104                          llvm::cl::Hidden);
105 
106 llvm::cl::OptionCategory DependencyScannerCategory("Tool options");
107 
108 static llvm::cl::opt<ScanningMode> ScanMode(
109     "mode",
110     llvm::cl::desc("The preprocessing mode used to compute the dependencies"),
111     llvm::cl::values(
112         clEnumValN(ScanningMode::MinimizedSourcePreprocessing,
113                    "preprocess-minimized-sources",
114                    "The set of dependencies is computed by preprocessing the "
115                    "source files that were minimized to only include the "
116                    "contents that might affect the dependencies"),
117         clEnumValN(ScanningMode::CanonicalPreprocessing, "preprocess",
118                    "The set of dependencies is computed by preprocessing the "
119                    "unmodified source files")),
120     llvm::cl::init(ScanningMode::MinimizedSourcePreprocessing),
121     llvm::cl::cat(DependencyScannerCategory));
122 
123 static llvm::cl::opt<ScanningOutputFormat> Format(
124     "format", llvm::cl::desc("The output format for the dependencies"),
125     llvm::cl::values(clEnumValN(ScanningOutputFormat::Make, "make",
126                                 "Makefile compatible dep file"),
127                      clEnumValN(ScanningOutputFormat::Full, "experimental-full",
128                                 "Full dependency graph suitable"
129                                 " for explicitly building modules. This format "
130                                 "is experimental and will change.")),
131     llvm::cl::init(ScanningOutputFormat::Make),
132     llvm::cl::cat(DependencyScannerCategory));
133 
134 static llvm::cl::opt<bool> FullCommandLine(
135     "full-command-line",
136     llvm::cl::desc("Include the full command lines to use to build modules"),
137     llvm::cl::init(false), llvm::cl::cat(DependencyScannerCategory));
138 
139 llvm::cl::opt<unsigned>
140     NumThreads("j", llvm::cl::Optional,
141                llvm::cl::desc("Number of worker threads to use (default: use "
142                               "all concurrent threads)"),
143                llvm::cl::init(0), llvm::cl::cat(DependencyScannerCategory));
144 
145 llvm::cl::opt<std::string>
146     CompilationDB("compilation-database",
147                   llvm::cl::desc("Compilation database"), llvm::cl::Required,
148                   llvm::cl::cat(DependencyScannerCategory));
149 
150 llvm::cl::opt<bool> ReuseFileManager(
151     "reuse-filemanager",
152     llvm::cl::desc("Reuse the file manager and its cache between invocations."),
153     llvm::cl::init(true), llvm::cl::cat(DependencyScannerCategory));
154 
155 llvm::cl::opt<bool> SkipExcludedPPRanges(
156     "skip-excluded-pp-ranges",
157     llvm::cl::desc(
158         "Use the preprocessor optimization that skips excluded conditionals by "
159         "bumping the buffer pointer in the lexer instead of lexing the tokens  "
160         "until reaching the end directive."),
161     llvm::cl::init(true), llvm::cl::cat(DependencyScannerCategory));
162 
163 llvm::cl::opt<bool> Verbose("v", llvm::cl::Optional,
164                             llvm::cl::desc("Use verbose output."),
165                             llvm::cl::init(false),
166                             llvm::cl::cat(DependencyScannerCategory));
167 
168 } // end anonymous namespace
169 
170 /// \returns object-file path derived from source-file path.
getObjFilePath(StringRef SrcFile)171 static std::string getObjFilePath(StringRef SrcFile) {
172   SmallString<128> ObjFileName(SrcFile);
173   llvm::sys::path::replace_extension(ObjFileName, "o");
174   return std::string(ObjFileName.str());
175 }
176 
177 class SingleCommandCompilationDatabase : public tooling::CompilationDatabase {
178 public:
SingleCommandCompilationDatabase(tooling::CompileCommand Cmd)179   SingleCommandCompilationDatabase(tooling::CompileCommand Cmd)
180       : Command(std::move(Cmd)) {}
181 
182   std::vector<tooling::CompileCommand>
getCompileCommands(StringRef FilePath) const183   getCompileCommands(StringRef FilePath) const override {
184     return {Command};
185   }
186 
getAllCompileCommands() const187   std::vector<tooling::CompileCommand> getAllCompileCommands() const override {
188     return {Command};
189   }
190 
191 private:
192   tooling::CompileCommand Command;
193 };
194 
195 /// Takes the result of a dependency scan and prints error / dependency files
196 /// based on the result.
197 ///
198 /// \returns True on error.
199 static bool
handleMakeDependencyToolResult(const std::string & Input,llvm::Expected<std::string> & MaybeFile,SharedStream & OS,SharedStream & Errs)200 handleMakeDependencyToolResult(const std::string &Input,
201                                llvm::Expected<std::string> &MaybeFile,
202                                SharedStream &OS, SharedStream &Errs) {
203   if (!MaybeFile) {
204     llvm::handleAllErrors(
205         MaybeFile.takeError(), [&Input, &Errs](llvm::StringError &Err) {
206           Errs.applyLocked([&](raw_ostream &OS) {
207             OS << "Error while scanning dependencies for " << Input << ":\n";
208             OS << Err.getMessage();
209           });
210         });
211     return true;
212   }
213   OS.applyLocked([&](raw_ostream &OS) { OS << *MaybeFile; });
214   return false;
215 }
216 
toJSONSorted(const llvm::StringSet<> & Set)217 static llvm::json::Array toJSONSorted(const llvm::StringSet<> &Set) {
218   std::vector<llvm::StringRef> Strings;
219   for (auto &&I : Set)
220     Strings.push_back(I.getKey());
221   llvm::sort(Strings);
222   return llvm::json::Array(Strings);
223 }
224 
toJSONSorted(std::vector<ClangModuleDep> V)225 static llvm::json::Array toJSONSorted(std::vector<ClangModuleDep> V) {
226   llvm::sort(V, [](const ClangModuleDep &A, const ClangModuleDep &B) {
227     return std::tie(A.ModuleName, A.ContextHash) <
228            std::tie(B.ModuleName, B.ContextHash);
229   });
230 
231   llvm::json::Array Ret;
232   for (const ClangModuleDep &CMD : V)
233     Ret.push_back(llvm::json::Object(
234         {{"module-name", CMD.ModuleName}, {"context-hash", CMD.ContextHash}}));
235   return Ret;
236 }
237 
238 // Thread safe.
239 class FullDeps {
240 public:
mergeDeps(StringRef Input,FullDependenciesResult FDR,size_t InputIndex)241   void mergeDeps(StringRef Input, FullDependenciesResult FDR,
242                  size_t InputIndex) {
243     const FullDependencies &FD = FDR.FullDeps;
244 
245     InputDeps ID;
246     ID.FileName = std::string(Input);
247     ID.ContextHash = std::move(FD.ContextHash);
248     ID.FileDeps = std::move(FD.FileDeps);
249     ID.ModuleDeps = std::move(FD.ClangModuleDeps);
250 
251     std::unique_lock<std::mutex> ul(Lock);
252     for (const ModuleDeps &MD : FDR.DiscoveredModules) {
253       auto I = Modules.find({MD.ContextHash, MD.ModuleName, 0});
254       if (I != Modules.end()) {
255         I->first.InputIndex = std::min(I->first.InputIndex, InputIndex);
256         continue;
257       }
258       Modules.insert(
259           I, {{MD.ContextHash, MD.ModuleName, InputIndex}, std::move(MD)});
260     }
261 
262     if (FullCommandLine)
263       ID.AdditonalCommandLine = FD.getAdditionalCommandLine(
264           [&](ClangModuleDep CMD) { return lookupPCMPath(CMD); },
265           [&](ClangModuleDep CMD) -> const ModuleDeps & {
266             return lookupModuleDeps(CMD);
267           });
268 
269     Inputs.push_back(std::move(ID));
270   }
271 
printFullOutput(raw_ostream & OS)272   void printFullOutput(raw_ostream &OS) {
273     // Sort the modules by name to get a deterministic order.
274     std::vector<ContextModulePair> ModuleNames;
275     for (auto &&M : Modules)
276       ModuleNames.push_back(M.first);
277     llvm::sort(ModuleNames,
278                [](const ContextModulePair &A, const ContextModulePair &B) {
279                  return std::tie(A.ModuleName, A.InputIndex) <
280                         std::tie(B.ModuleName, B.InputIndex);
281                });
282 
283     llvm::sort(Inputs, [](const InputDeps &A, const InputDeps &B) {
284       return A.FileName < B.FileName;
285     });
286 
287     using namespace llvm::json;
288 
289     Array OutModules;
290     for (auto &&ModName : ModuleNames) {
291       auto &MD = Modules[ModName];
292       Object O{
293           {"name", MD.ModuleName},
294           {"context-hash", MD.ContextHash},
295           {"file-deps", toJSONSorted(MD.FileDeps)},
296           {"clang-module-deps", toJSONSorted(MD.ClangModuleDeps)},
297           {"clang-modulemap-file", MD.ClangModuleMapFile},
298           {"command-line",
299            FullCommandLine
300                ? MD.getFullCommandLine(
301                      [&](ClangModuleDep CMD) { return lookupPCMPath(CMD); },
302                      [&](ClangModuleDep CMD) -> const ModuleDeps & {
303                        return lookupModuleDeps(CMD);
304                      })
305                : MD.NonPathCommandLine},
306       };
307       OutModules.push_back(std::move(O));
308     }
309 
310     Array TUs;
311     for (auto &&I : Inputs) {
312       Object O{
313           {"input-file", I.FileName},
314           {"clang-context-hash", I.ContextHash},
315           {"file-deps", I.FileDeps},
316           {"clang-module-deps", toJSONSorted(I.ModuleDeps)},
317           {"command-line", I.AdditonalCommandLine},
318       };
319       TUs.push_back(std::move(O));
320     }
321 
322     Object Output{
323         {"modules", std::move(OutModules)},
324         {"translation-units", std::move(TUs)},
325     };
326 
327     OS << llvm::formatv("{0:2}\n", Value(std::move(Output)));
328   }
329 
330 private:
lookupPCMPath(ClangModuleDep CMD)331   StringRef lookupPCMPath(ClangModuleDep CMD) {
332     return Modules[ContextModulePair{CMD.ContextHash, CMD.ModuleName, 0}]
333         .ImplicitModulePCMPath;
334   }
335 
lookupModuleDeps(ClangModuleDep CMD)336   const ModuleDeps &lookupModuleDeps(ClangModuleDep CMD) {
337     auto I =
338         Modules.find(ContextModulePair{CMD.ContextHash, CMD.ModuleName, 0});
339     assert(I != Modules.end());
340     return I->second;
341   };
342 
343   struct ContextModulePair {
344     std::string ContextHash;
345     std::string ModuleName;
346     mutable size_t InputIndex;
347 
operator ==FullDeps::ContextModulePair348     bool operator==(const ContextModulePair &Other) const {
349       return ContextHash == Other.ContextHash && ModuleName == Other.ModuleName;
350     }
351   };
352 
353   struct ContextModulePairHasher {
operator ()FullDeps::ContextModulePairHasher354     std::size_t operator()(const ContextModulePair &CMP) const {
355       using llvm::hash_combine;
356 
357       return hash_combine(CMP.ContextHash, CMP.ModuleName);
358     }
359   };
360 
361   struct InputDeps {
362     std::string FileName;
363     std::string ContextHash;
364     std::vector<std::string> FileDeps;
365     std::vector<ClangModuleDep> ModuleDeps;
366     std::vector<std::string> AdditonalCommandLine;
367   };
368 
369   std::mutex Lock;
370   std::unordered_map<ContextModulePair, ModuleDeps, ContextModulePairHasher>
371       Modules;
372   std::vector<InputDeps> Inputs;
373 };
374 
handleFullDependencyToolResult(const std::string & Input,llvm::Expected<FullDependenciesResult> & MaybeFullDeps,FullDeps & FD,size_t InputIndex,SharedStream & OS,SharedStream & Errs)375 static bool handleFullDependencyToolResult(
376     const std::string &Input,
377     llvm::Expected<FullDependenciesResult> &MaybeFullDeps, FullDeps &FD,
378     size_t InputIndex, SharedStream &OS, SharedStream &Errs) {
379   if (!MaybeFullDeps) {
380     llvm::handleAllErrors(
381         MaybeFullDeps.takeError(), [&Input, &Errs](llvm::StringError &Err) {
382           Errs.applyLocked([&](raw_ostream &OS) {
383             OS << "Error while scanning dependencies for " << Input << ":\n";
384             OS << Err.getMessage();
385           });
386         });
387     return true;
388   }
389   FD.mergeDeps(Input, std::move(*MaybeFullDeps), InputIndex);
390   return false;
391 }
392 
main(int argc,const char ** argv)393 int main(int argc, const char **argv) {
394   llvm::InitLLVM X(argc, argv);
395   llvm::cl::HideUnrelatedOptions(DependencyScannerCategory);
396   if (!llvm::cl::ParseCommandLineOptions(argc, argv))
397     return 1;
398 
399   std::string ErrorMessage;
400   std::unique_ptr<tooling::JSONCompilationDatabase> Compilations =
401       tooling::JSONCompilationDatabase::loadFromFile(
402           CompilationDB, ErrorMessage,
403           tooling::JSONCommandLineSyntax::AutoDetect);
404   if (!Compilations) {
405     llvm::errs() << "error: " << ErrorMessage << "\n";
406     return 1;
407   }
408 
409   llvm::cl::PrintOptionValues();
410 
411   // The command options are rewritten to run Clang in preprocessor only mode.
412   auto AdjustingCompilations =
413       std::make_unique<tooling::ArgumentsAdjustingCompilations>(
414           std::move(Compilations));
415   ResourceDirectoryCache ResourceDirCache;
416   AdjustingCompilations->appendArgumentsAdjuster(
417       [&ResourceDirCache](const tooling::CommandLineArguments &Args,
418                           StringRef FileName) {
419         std::string LastO = "";
420         bool HasMT = false;
421         bool HasMQ = false;
422         bool HasMD = false;
423         bool HasResourceDir = false;
424         // We need to find the last -o value.
425         if (!Args.empty()) {
426           std::size_t Idx = Args.size() - 1;
427           for (auto It = Args.rbegin(); It != Args.rend(); ++It) {
428             if (It != Args.rbegin()) {
429               if (Args[Idx] == "-o")
430                 LastO = Args[Idx + 1];
431               if (Args[Idx] == "-MT")
432                 HasMT = true;
433               if (Args[Idx] == "-MQ")
434                 HasMQ = true;
435               if (Args[Idx] == "-MD")
436                 HasMD = true;
437               if (Args[Idx] == "-resource-dir")
438                 HasResourceDir = true;
439             }
440             --Idx;
441           }
442         }
443         // If there's no -MT/-MQ Driver would add -MT with the value of the last
444         // -o option.
445         tooling::CommandLineArguments AdjustedArgs = Args;
446         AdjustedArgs.push_back("-o");
447         AdjustedArgs.push_back("/dev/null");
448         if (!HasMT && !HasMQ) {
449           AdjustedArgs.push_back("-M");
450           AdjustedArgs.push_back("-MT");
451           // We're interested in source dependencies of an object file.
452           if (!HasMD) {
453             // FIXME: We are missing the directory unless the -o value is an
454             // absolute path.
455             AdjustedArgs.push_back(!LastO.empty() ? LastO
456                                                   : getObjFilePath(FileName));
457           } else {
458             AdjustedArgs.push_back(std::string(FileName));
459           }
460         }
461         AdjustedArgs.push_back("-Xclang");
462         AdjustedArgs.push_back("-Eonly");
463         AdjustedArgs.push_back("-Xclang");
464         AdjustedArgs.push_back("-sys-header-deps");
465         AdjustedArgs.push_back("-Wno-error");
466 
467         if (!HasResourceDir) {
468           StringRef ResourceDir =
469               ResourceDirCache.findResourceDir(Args);
470           if (!ResourceDir.empty()) {
471             AdjustedArgs.push_back("-resource-dir");
472             AdjustedArgs.push_back(std::string(ResourceDir));
473           }
474         }
475         return AdjustedArgs;
476       });
477   AdjustingCompilations->appendArgumentsAdjuster(
478       tooling::getClangStripSerializeDiagnosticAdjuster());
479 
480   SharedStream Errs(llvm::errs());
481   // Print out the dependency results to STDOUT by default.
482   SharedStream DependencyOS(llvm::outs());
483 
484   DependencyScanningService Service(ScanMode, Format, ReuseFileManager,
485                                     SkipExcludedPPRanges);
486   llvm::ThreadPool Pool(llvm::hardware_concurrency(NumThreads));
487   std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools;
488   for (unsigned I = 0; I < Pool.getThreadCount(); ++I)
489     WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
490 
491   std::vector<SingleCommandCompilationDatabase> Inputs;
492   for (tooling::CompileCommand Cmd :
493        AdjustingCompilations->getAllCompileCommands())
494     Inputs.emplace_back(Cmd);
495 
496   std::atomic<bool> HadErrors(false);
497   FullDeps FD;
498   std::mutex Lock;
499   size_t Index = 0;
500 
501   if (Verbose) {
502     llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
503                  << " files using " << Pool.getThreadCount() << " workers\n";
504   }
505   for (unsigned I = 0; I < Pool.getThreadCount(); ++I) {
506     Pool.async([I, &Lock, &Index, &Inputs, &HadErrors, &FD, &WorkerTools,
507                 &DependencyOS, &Errs]() {
508       llvm::StringSet<> AlreadySeenModules;
509       while (true) {
510         const SingleCommandCompilationDatabase *Input;
511         std::string Filename;
512         std::string CWD;
513         size_t LocalIndex;
514         // Take the next input.
515         {
516           std::unique_lock<std::mutex> LockGuard(Lock);
517           if (Index >= Inputs.size())
518             return;
519           LocalIndex = Index;
520           Input = &Inputs[Index++];
521           tooling::CompileCommand Cmd = Input->getAllCompileCommands()[0];
522           Filename = std::move(Cmd.Filename);
523           CWD = std::move(Cmd.Directory);
524         }
525         // Run the tool on it.
526         if (Format == ScanningOutputFormat::Make) {
527           auto MaybeFile = WorkerTools[I]->getDependencyFile(*Input, CWD);
528           if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
529                                              Errs))
530             HadErrors = true;
531         } else {
532           auto MaybeFullDeps = WorkerTools[I]->getFullDependencies(
533               *Input, CWD, AlreadySeenModules);
534           if (handleFullDependencyToolResult(Filename, MaybeFullDeps, FD,
535                                              LocalIndex, DependencyOS, Errs))
536             HadErrors = true;
537         }
538       }
539     });
540   }
541   Pool.wait();
542 
543   if (Format == ScanningOutputFormat::Full)
544     FD.printFullOutput(llvm::outs());
545 
546   return HadErrors;
547 }
548