1 //===- DependencyScanningWorker.cpp - clang-scan-deps worker --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
10 #include "clang/Basic/DiagnosticDriver.h"
11 #include "clang/Basic/DiagnosticFrontend.h"
12 #include "clang/CodeGen/ObjectFilePCHContainerOperations.h"
13 #include "clang/Driver/Compilation.h"
14 #include "clang/Driver/Driver.h"
15 #include "clang/Driver/Job.h"
16 #include "clang/Driver/Tool.h"
17 #include "clang/Frontend/CompilerInstance.h"
18 #include "clang/Frontend/CompilerInvocation.h"
19 #include "clang/Frontend/FrontendActions.h"
20 #include "clang/Frontend/TextDiagnosticPrinter.h"
21 #include "clang/Frontend/Utils.h"
22 #include "clang/Lex/PreprocessorOptions.h"
23 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
24 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
25 #include "clang/Tooling/Tooling.h"
26 #include "llvm/Support/Allocator.h"
27 #include "llvm/Support/Error.h"
28 #include "llvm/TargetParser/Host.h"
29 #include <optional>
30 
31 using namespace clang;
32 using namespace tooling;
33 using namespace dependencies;
34 
35 namespace {
36 
37 /// Forwards the gatherered dependencies to the consumer.
38 class DependencyConsumerForwarder : public DependencyFileGenerator {
39 public:
40   DependencyConsumerForwarder(std::unique_ptr<DependencyOutputOptions> Opts,
41                               StringRef WorkingDirectory, DependencyConsumer &C)
42       : DependencyFileGenerator(*Opts), WorkingDirectory(WorkingDirectory),
43         Opts(std::move(Opts)), C(C) {}
44 
45   void finishedMainFile(DiagnosticsEngine &Diags) override {
46     C.handleDependencyOutputOpts(*Opts);
47     llvm::SmallString<256> CanonPath;
48     for (const auto &File : getDependencies()) {
49       CanonPath = File;
50       llvm::sys::path::remove_dots(CanonPath, /*remove_dot_dot=*/true);
51       llvm::sys::fs::make_absolute(WorkingDirectory, CanonPath);
52       C.handleFileDependency(CanonPath);
53     }
54   }
55 
56 private:
57   StringRef WorkingDirectory;
58   std::unique_ptr<DependencyOutputOptions> Opts;
59   DependencyConsumer &C;
60 };
61 
62 using PrebuiltModuleFilesT = decltype(HeaderSearchOptions::PrebuiltModuleFiles);
63 
64 /// A listener that collects the imported modules and optionally the input
65 /// files.
66 class PrebuiltModuleListener : public ASTReaderListener {
67 public:
68   PrebuiltModuleListener(PrebuiltModuleFilesT &PrebuiltModuleFiles,
69                          llvm::SmallVector<std::string> &NewModuleFiles)
70       : PrebuiltModuleFiles(PrebuiltModuleFiles),
71         NewModuleFiles(NewModuleFiles) {}
72 
73   bool needsImportVisitation() const override { return true; }
74 
75   void visitImport(StringRef ModuleName, StringRef Filename) override {
76     if (PrebuiltModuleFiles.insert({ModuleName.str(), Filename.str()}).second)
77       NewModuleFiles.push_back(Filename.str());
78   }
79 
80 private:
81   PrebuiltModuleFilesT &PrebuiltModuleFiles;
82   llvm::SmallVector<std::string> &NewModuleFiles;
83 };
84 
85 /// Visit the given prebuilt module and collect all of the modules it
86 /// transitively imports and contributing input files.
87 static void visitPrebuiltModule(StringRef PrebuiltModuleFilename,
88                                 CompilerInstance &CI,
89                                 PrebuiltModuleFilesT &ModuleFiles) {
90   // List of module files to be processed.
91   llvm::SmallVector<std::string> Worklist{PrebuiltModuleFilename.str()};
92   PrebuiltModuleListener Listener(ModuleFiles, Worklist);
93 
94   while (!Worklist.empty())
95     ASTReader::readASTFileControlBlock(
96         Worklist.pop_back_val(), CI.getFileManager(), CI.getModuleCache(),
97         CI.getPCHContainerReader(),
98         /*FindModuleFileExtensions=*/false, Listener,
99         /*ValidateDiagnosticOptions=*/false);
100 }
101 
102 /// Transform arbitrary file name into an object-like file name.
103 static std::string makeObjFileName(StringRef FileName) {
104   SmallString<128> ObjFileName(FileName);
105   llvm::sys::path::replace_extension(ObjFileName, "o");
106   return std::string(ObjFileName.str());
107 }
108 
109 /// Deduce the dependency target based on the output file and input files.
110 static std::string
111 deduceDepTarget(const std::string &OutputFile,
112                 const SmallVectorImpl<FrontendInputFile> &InputFiles) {
113   if (OutputFile != "-")
114     return OutputFile;
115 
116   if (InputFiles.empty() || !InputFiles.front().isFile())
117     return "clang-scan-deps\\ dependency";
118 
119   return makeObjFileName(InputFiles.front().getFile());
120 }
121 
122 /// Sanitize diagnostic options for dependency scan.
123 static void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
124   // Don't print 'X warnings and Y errors generated'.
125   DiagOpts.ShowCarets = false;
126   // Don't write out diagnostic file.
127   DiagOpts.DiagnosticSerializationFile.clear();
128   // Don't emit warnings as errors (and all other warnings too).
129   DiagOpts.IgnoreWarnings = true;
130 }
131 
132 /// A clang tool that runs the preprocessor in a mode that's optimized for
133 /// dependency scanning for the given compiler invocation.
134 class DependencyScanningAction : public tooling::ToolAction {
135 public:
136   DependencyScanningAction(
137       StringRef WorkingDirectory, DependencyConsumer &Consumer,
138       DependencyActionController &Controller,
139       llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS,
140       ScanningOutputFormat Format, ScanningOptimizations OptimizeArgs,
141       bool EagerLoadModules, bool DisableFree,
142       std::optional<StringRef> ModuleName = std::nullopt)
143       : WorkingDirectory(WorkingDirectory), Consumer(Consumer),
144         Controller(Controller), DepFS(std::move(DepFS)), Format(Format),
145         OptimizeArgs(OptimizeArgs), EagerLoadModules(EagerLoadModules),
146         DisableFree(DisableFree), ModuleName(ModuleName) {}
147 
148   bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
149                      FileManager *FileMgr,
150                      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
151                      DiagnosticConsumer *DiagConsumer) override {
152     // Make a deep copy of the original Clang invocation.
153     CompilerInvocation OriginalInvocation(*Invocation);
154     // Restore the value of DisableFree, which may be modified by Tooling.
155     OriginalInvocation.getFrontendOpts().DisableFree = DisableFree;
156 
157     if (Scanned) {
158       // Scanning runs once for the first -cc1 invocation in a chain of driver
159       // jobs. For any dependent jobs, reuse the scanning result and just
160       // update the LastCC1Arguments to correspond to the new invocation.
161       // FIXME: to support multi-arch builds, each arch requires a separate scan
162       setLastCC1Arguments(std::move(OriginalInvocation));
163       return true;
164     }
165 
166     Scanned = true;
167 
168     // Create a compiler instance to handle the actual work.
169     ScanInstanceStorage.emplace(std::move(PCHContainerOps));
170     CompilerInstance &ScanInstance = *ScanInstanceStorage;
171     ScanInstance.setInvocation(std::move(Invocation));
172 
173     // Create the compiler's actual diagnostics engine.
174     sanitizeDiagOpts(ScanInstance.getDiagnosticOpts());
175     ScanInstance.createDiagnostics(DiagConsumer, /*ShouldOwnClient=*/false);
176     if (!ScanInstance.hasDiagnostics())
177       return false;
178 
179     ScanInstance.getPreprocessorOpts().AllowPCHWithDifferentModulesCachePath =
180         true;
181 
182     ScanInstance.getFrontendOpts().GenerateGlobalModuleIndex = false;
183     ScanInstance.getFrontendOpts().UseGlobalModuleIndex = false;
184     ScanInstance.getFrontendOpts().ModulesShareFileManager = false;
185     ScanInstance.getHeaderSearchOpts().ModuleFormat = "raw";
186 
187     ScanInstance.setFileManager(FileMgr);
188     // Support for virtual file system overlays.
189     FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation(
190         ScanInstance.getInvocation(), ScanInstance.getDiagnostics(),
191         FileMgr->getVirtualFileSystemPtr()));
192 
193     ScanInstance.createSourceManager(*FileMgr);
194 
195     // Store the list of prebuilt module files into header search options. This
196     // will prevent the implicit build to create duplicate modules and will
197     // force reuse of the existing prebuilt module files instead.
198     if (!ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
199       visitPrebuiltModule(
200           ScanInstance.getPreprocessorOpts().ImplicitPCHInclude, ScanInstance,
201           ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles);
202 
203     // Use the dependency scanning optimized file system if requested to do so.
204     if (DepFS) {
205       llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> LocalDepFS =
206           DepFS;
207       ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile =
208           [LocalDepFS = std::move(LocalDepFS)](FileEntryRef File)
209           -> std::optional<ArrayRef<dependency_directives_scan::Directive>> {
210         if (llvm::ErrorOr<EntryRef> Entry =
211                 LocalDepFS->getOrCreateFileSystemEntry(File.getName()))
212           return Entry->getDirectiveTokens();
213         return std::nullopt;
214       };
215     }
216 
217     // Create the dependency collector that will collect the produced
218     // dependencies.
219     //
220     // This also moves the existing dependency output options from the
221     // invocation to the collector. The options in the invocation are reset,
222     // which ensures that the compiler won't create new dependency collectors,
223     // and thus won't write out the extra '.d' files to disk.
224     auto Opts = std::make_unique<DependencyOutputOptions>();
225     std::swap(*Opts, ScanInstance.getInvocation().getDependencyOutputOpts());
226     // We need at least one -MT equivalent for the generator of make dependency
227     // files to work.
228     if (Opts->Targets.empty())
229       Opts->Targets = {
230           deduceDepTarget(ScanInstance.getFrontendOpts().OutputFile,
231                           ScanInstance.getFrontendOpts().Inputs)};
232     Opts->IncludeSystemHeaders = true;
233 
234     switch (Format) {
235     case ScanningOutputFormat::Make:
236       ScanInstance.addDependencyCollector(
237           std::make_shared<DependencyConsumerForwarder>(
238               std::move(Opts), WorkingDirectory, Consumer));
239       break;
240     case ScanningOutputFormat::P1689:
241     case ScanningOutputFormat::Full:
242       MDC = std::make_shared<ModuleDepCollector>(
243           std::move(Opts), ScanInstance, Consumer, Controller,
244           OriginalInvocation, OptimizeArgs, EagerLoadModules,
245           Format == ScanningOutputFormat::P1689);
246       ScanInstance.addDependencyCollector(MDC);
247       break;
248     }
249 
250     // Consider different header search and diagnostic options to create
251     // different modules. This avoids the unsound aliasing of module PCMs.
252     //
253     // TODO: Implement diagnostic bucketing to reduce the impact of strict
254     // context hashing.
255     ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true;
256     ScanInstance.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
257     ScanInstance.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
258     ScanInstance.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings =
259         true;
260 
261     // Avoid some checks and module map parsing when loading PCM files.
262     ScanInstance.getPreprocessorOpts().ModulesCheckRelocated = false;
263 
264     std::unique_ptr<FrontendAction> Action;
265 
266     if (ModuleName)
267       Action = std::make_unique<GetDependenciesByModuleNameAction>(*ModuleName);
268     else
269       Action = std::make_unique<ReadPCHAndPreprocessAction>();
270 
271     const bool Result = ScanInstance.ExecuteAction(*Action);
272 
273     if (Result)
274       setLastCC1Arguments(std::move(OriginalInvocation));
275 
276     return Result;
277   }
278 
279   bool hasScanned() const { return Scanned; }
280 
281   /// Take the cc1 arguments corresponding to the most recent invocation used
282   /// with this action. Any modifications implied by the discovered dependencies
283   /// will have already been applied.
284   std::vector<std::string> takeLastCC1Arguments() {
285     std::vector<std::string> Result;
286     std::swap(Result, LastCC1Arguments); // Reset LastCC1Arguments to empty.
287     return Result;
288   }
289 
290 private:
291   void setLastCC1Arguments(CompilerInvocation &&CI) {
292     if (MDC)
293       MDC->applyDiscoveredDependencies(CI);
294     LastCC1Arguments = CI.getCC1CommandLine();
295   }
296 
297 private:
298   StringRef WorkingDirectory;
299   DependencyConsumer &Consumer;
300   DependencyActionController &Controller;
301   llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
302   ScanningOutputFormat Format;
303   ScanningOptimizations OptimizeArgs;
304   bool EagerLoadModules;
305   bool DisableFree;
306   std::optional<StringRef> ModuleName;
307   std::optional<CompilerInstance> ScanInstanceStorage;
308   std::shared_ptr<ModuleDepCollector> MDC;
309   std::vector<std::string> LastCC1Arguments;
310   bool Scanned = false;
311 };
312 
313 } // end anonymous namespace
314 
315 DependencyScanningWorker::DependencyScanningWorker(
316     DependencyScanningService &Service,
317     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
318     : Format(Service.getFormat()), OptimizeArgs(Service.getOptimizeArgs()),
319       EagerLoadModules(Service.shouldEagerLoadModules()) {
320   PCHContainerOps = std::make_shared<PCHContainerOperations>();
321   // We need to read object files from PCH built outside the scanner.
322   PCHContainerOps->registerReader(
323       std::make_unique<ObjectFilePCHContainerReader>());
324   // The scanner itself writes only raw ast files.
325   PCHContainerOps->registerWriter(std::make_unique<RawPCHContainerWriter>());
326 
327   switch (Service.getMode()) {
328   case ScanningMode::DependencyDirectivesScan:
329     DepFS =
330         new DependencyScanningWorkerFilesystem(Service.getSharedCache(), FS);
331     BaseFS = DepFS;
332     break;
333   case ScanningMode::CanonicalPreprocessing:
334     DepFS = nullptr;
335     BaseFS = FS;
336     break;
337   }
338 }
339 
340 llvm::Error DependencyScanningWorker::computeDependencies(
341     StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
342     DependencyConsumer &Consumer, DependencyActionController &Controller,
343     std::optional<StringRef> ModuleName) {
344   std::vector<const char *> CLI;
345   for (const std::string &Arg : CommandLine)
346     CLI.push_back(Arg.c_str());
347   auto DiagOpts = CreateAndPopulateDiagOpts(CLI);
348   sanitizeDiagOpts(*DiagOpts);
349 
350   // Capture the emitted diagnostics and report them to the client
351   // in the case of a failure.
352   std::string DiagnosticOutput;
353   llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput);
354   TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, DiagOpts.release());
355 
356   if (computeDependencies(WorkingDirectory, CommandLine, Consumer, Controller,
357                           DiagPrinter, ModuleName))
358     return llvm::Error::success();
359   return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(),
360                                              llvm::inconvertibleErrorCode());
361 }
362 
363 static bool forEachDriverJob(
364     ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags, FileManager &FM,
365     llvm::function_ref<bool(const driver::Command &Cmd)> Callback) {
366   SmallVector<const char *, 256> Argv;
367   Argv.reserve(ArgStrs.size());
368   for (const std::string &Arg : ArgStrs)
369     Argv.push_back(Arg.c_str());
370 
371   llvm::vfs::FileSystem *FS = &FM.getVirtualFileSystem();
372 
373   std::unique_ptr<driver::Driver> Driver = std::make_unique<driver::Driver>(
374       Argv[0], llvm::sys::getDefaultTargetTriple(), Diags,
375       "clang LLVM compiler", FS);
376   Driver->setTitle("clang_based_tool");
377 
378   llvm::BumpPtrAllocator Alloc;
379   bool CLMode = driver::IsClangCL(
380       driver::getDriverMode(Argv[0], ArrayRef(Argv).slice(1)));
381 
382   if (llvm::Error E = driver::expandResponseFiles(Argv, CLMode, Alloc, FS)) {
383     Diags.Report(diag::err_drv_expand_response_file)
384         << llvm::toString(std::move(E));
385     return false;
386   }
387 
388   const std::unique_ptr<driver::Compilation> Compilation(
389       Driver->BuildCompilation(llvm::ArrayRef(Argv)));
390   if (!Compilation)
391     return false;
392 
393   if (Compilation->containsError())
394     return false;
395 
396   for (const driver::Command &Job : Compilation->getJobs()) {
397     if (!Callback(Job))
398       return false;
399   }
400   return true;
401 }
402 
403 static bool createAndRunToolInvocation(
404     std::vector<std::string> CommandLine, DependencyScanningAction &Action,
405     FileManager &FM,
406     std::shared_ptr<clang::PCHContainerOperations> &PCHContainerOps,
407     DiagnosticsEngine &Diags, DependencyConsumer &Consumer) {
408 
409   // Save executable path before providing CommandLine to ToolInvocation
410   std::string Executable = CommandLine[0];
411   ToolInvocation Invocation(std::move(CommandLine), &Action, &FM,
412                             PCHContainerOps);
413   Invocation.setDiagnosticConsumer(Diags.getClient());
414   Invocation.setDiagnosticOptions(&Diags.getDiagnosticOptions());
415   if (!Invocation.run())
416     return false;
417 
418   std::vector<std::string> Args = Action.takeLastCC1Arguments();
419   Consumer.handleBuildCommand({std::move(Executable), std::move(Args)});
420   return true;
421 }
422 
423 bool DependencyScanningWorker::computeDependencies(
424     StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
425     DependencyConsumer &Consumer, DependencyActionController &Controller,
426     DiagnosticConsumer &DC, std::optional<StringRef> ModuleName) {
427   // Reset what might have been modified in the previous worker invocation.
428   BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
429 
430   std::optional<std::vector<std::string>> ModifiedCommandLine;
431   llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS;
432 
433   // If we're scanning based on a module name alone, we don't expect the client
434   // to provide us with an input file. However, the driver really wants to have
435   // one. Let's just make it up to make the driver happy.
436   if (ModuleName) {
437     auto OverlayFS =
438         llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
439     auto InMemoryFS =
440         llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
441     InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
442     OverlayFS->pushOverlay(InMemoryFS);
443     ModifiedFS = OverlayFS;
444 
445     SmallString<128> FakeInputPath;
446     // TODO: We should retry the creation if the path already exists.
447     llvm::sys::fs::createUniquePath(*ModuleName + "-%%%%%%%%.input",
448                                     FakeInputPath,
449                                     /*MakeAbsolute=*/false);
450     InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(""));
451 
452     ModifiedCommandLine = CommandLine;
453     ModifiedCommandLine->emplace_back(FakeInputPath);
454   }
455 
456   const std::vector<std::string> &FinalCommandLine =
457       ModifiedCommandLine ? *ModifiedCommandLine : CommandLine;
458   auto &FinalFS = ModifiedFS ? ModifiedFS : BaseFS;
459 
460   FileSystemOptions FSOpts;
461   FSOpts.WorkingDir = WorkingDirectory.str();
462   auto FileMgr = llvm::makeIntrusiveRefCnt<FileManager>(FSOpts, FinalFS);
463 
464   std::vector<const char *> FinalCCommandLine(FinalCommandLine.size(), nullptr);
465   llvm::transform(FinalCommandLine, FinalCCommandLine.begin(),
466                   [](const std::string &Str) { return Str.c_str(); });
467 
468   auto DiagOpts = CreateAndPopulateDiagOpts(FinalCCommandLine);
469   sanitizeDiagOpts(*DiagOpts);
470   IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
471       CompilerInstance::createDiagnostics(DiagOpts.release(), &DC,
472                                           /*ShouldOwnClient=*/false);
473 
474   // Although `Diagnostics` are used only for command-line parsing, the
475   // custom `DiagConsumer` might expect a `SourceManager` to be present.
476   SourceManager SrcMgr(*Diags, *FileMgr);
477   Diags->setSourceManager(&SrcMgr);
478   // DisableFree is modified by Tooling for running
479   // in-process; preserve the original value, which is
480   // always true for a driver invocation.
481   bool DisableFree = true;
482   DependencyScanningAction Action(WorkingDirectory, Consumer, Controller, DepFS,
483                                   Format, OptimizeArgs, EagerLoadModules,
484                                   DisableFree, ModuleName);
485 
486   bool Success = false;
487   if (FinalCommandLine[1] == "-cc1") {
488     Success = createAndRunToolInvocation(FinalCommandLine, Action, *FileMgr,
489                                          PCHContainerOps, *Diags, Consumer);
490   } else {
491     Success = forEachDriverJob(
492         FinalCommandLine, *Diags, *FileMgr, [&](const driver::Command &Cmd) {
493           if (StringRef(Cmd.getCreator().getName()) != "clang") {
494             // Non-clang command. Just pass through to the dependency
495             // consumer.
496             Consumer.handleBuildCommand(
497                 {Cmd.getExecutable(),
498                  {Cmd.getArguments().begin(), Cmd.getArguments().end()}});
499             return true;
500           }
501 
502           // Insert -cc1 comand line options into Argv
503           std::vector<std::string> Argv;
504           Argv.push_back(Cmd.getExecutable());
505           Argv.insert(Argv.end(), Cmd.getArguments().begin(),
506                       Cmd.getArguments().end());
507 
508           // Create an invocation that uses the underlying file
509           // system to ensure that any file system requests that
510           // are made by the driver do not go through the
511           // dependency scanning filesystem.
512           return createAndRunToolInvocation(std::move(Argv), Action, *FileMgr,
513                                             PCHContainerOps, *Diags, Consumer);
514         });
515   }
516 
517   if (Success && !Action.hasScanned())
518     Diags->Report(diag::err_fe_expected_compiler_job)
519         << llvm::join(FinalCommandLine, " ");
520   return Success && Action.hasScanned();
521 }
522 
523 DependencyActionController::~DependencyActionController() {}
524