1 //===- DependencyScanningWorker.cpp - clang-scan-deps worker --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
10 #include "clang/Basic/DiagnosticDriver.h"
11 #include "clang/Basic/DiagnosticFrontend.h"
12 #include "clang/CodeGen/ObjectFilePCHContainerOperations.h"
13 #include "clang/Driver/Compilation.h"
14 #include "clang/Driver/Driver.h"
15 #include "clang/Driver/Job.h"
16 #include "clang/Driver/Tool.h"
17 #include "clang/Frontend/CompilerInstance.h"
18 #include "clang/Frontend/CompilerInvocation.h"
19 #include "clang/Frontend/FrontendActions.h"
20 #include "clang/Frontend/TextDiagnosticPrinter.h"
21 #include "clang/Frontend/Utils.h"
22 #include "clang/Lex/PreprocessorOptions.h"
23 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
24 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
25 #include "clang/Tooling/Tooling.h"
26 #include "llvm/Support/Allocator.h"
27 #include "llvm/Support/Error.h"
28 #include "llvm/TargetParser/Host.h"
29 #include <optional>
30 
31 using namespace clang;
32 using namespace tooling;
33 using namespace dependencies;
34 
35 namespace {
36 
37 /// Forwards the gatherered dependencies to the consumer.
38 class DependencyConsumerForwarder : public DependencyFileGenerator {
39 public:
40   DependencyConsumerForwarder(std::unique_ptr<DependencyOutputOptions> Opts,
41                               StringRef WorkingDirectory, DependencyConsumer &C)
42       : DependencyFileGenerator(*Opts), WorkingDirectory(WorkingDirectory),
43         Opts(std::move(Opts)), C(C) {}
44 
45   void finishedMainFile(DiagnosticsEngine &Diags) override {
46     C.handleDependencyOutputOpts(*Opts);
47     llvm::SmallString<256> CanonPath;
48     for (const auto &File : getDependencies()) {
49       CanonPath = File;
50       llvm::sys::path::remove_dots(CanonPath, /*remove_dot_dot=*/true);
51       llvm::sys::fs::make_absolute(WorkingDirectory, CanonPath);
52       C.handleFileDependency(CanonPath);
53     }
54   }
55 
56 private:
57   StringRef WorkingDirectory;
58   std::unique_ptr<DependencyOutputOptions> Opts;
59   DependencyConsumer &C;
60 };
61 
62 using PrebuiltModuleFilesT = decltype(HeaderSearchOptions::PrebuiltModuleFiles);
63 
64 /// A listener that collects the imported modules and optionally the input
65 /// files.
66 class PrebuiltModuleListener : public ASTReaderListener {
67 public:
68   PrebuiltModuleListener(PrebuiltModuleFilesT &PrebuiltModuleFiles,
69                          llvm::SmallVector<std::string> &NewModuleFiles)
70       : PrebuiltModuleFiles(PrebuiltModuleFiles),
71         NewModuleFiles(NewModuleFiles) {}
72 
73   bool needsImportVisitation() const override { return true; }
74 
75   void visitImport(StringRef ModuleName, StringRef Filename) override {
76     if (PrebuiltModuleFiles.insert({ModuleName.str(), Filename.str()}).second)
77       NewModuleFiles.push_back(Filename.str());
78   }
79 
80 private:
81   PrebuiltModuleFilesT &PrebuiltModuleFiles;
82   llvm::SmallVector<std::string> &NewModuleFiles;
83 };
84 
85 /// Visit the given prebuilt module and collect all of the modules it
86 /// transitively imports and contributing input files.
87 static void visitPrebuiltModule(StringRef PrebuiltModuleFilename,
88                                 CompilerInstance &CI,
89                                 PrebuiltModuleFilesT &ModuleFiles) {
90   // List of module files to be processed.
91   llvm::SmallVector<std::string> Worklist{PrebuiltModuleFilename.str()};
92   PrebuiltModuleListener Listener(ModuleFiles, Worklist);
93 
94   while (!Worklist.empty())
95     ASTReader::readASTFileControlBlock(
96         Worklist.pop_back_val(), CI.getFileManager(), CI.getModuleCache(),
97         CI.getPCHContainerReader(),
98         /*FindModuleFileExtensions=*/false, Listener,
99         /*ValidateDiagnosticOptions=*/false);
100 }
101 
102 /// Transform arbitrary file name into an object-like file name.
103 static std::string makeObjFileName(StringRef FileName) {
104   SmallString<128> ObjFileName(FileName);
105   llvm::sys::path::replace_extension(ObjFileName, "o");
106   return std::string(ObjFileName.str());
107 }
108 
109 /// Deduce the dependency target based on the output file and input files.
110 static std::string
111 deduceDepTarget(const std::string &OutputFile,
112                 const SmallVectorImpl<FrontendInputFile> &InputFiles) {
113   if (OutputFile != "-")
114     return OutputFile;
115 
116   if (InputFiles.empty() || !InputFiles.front().isFile())
117     return "clang-scan-deps\\ dependency";
118 
119   return makeObjFileName(InputFiles.front().getFile());
120 }
121 
122 /// Sanitize diagnostic options for dependency scan.
123 static void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
124   // Don't print 'X warnings and Y errors generated'.
125   DiagOpts.ShowCarets = false;
126   // Don't write out diagnostic file.
127   DiagOpts.DiagnosticSerializationFile.clear();
128   // Don't emit warnings as errors (and all other warnings too).
129   DiagOpts.IgnoreWarnings = true;
130 }
131 
132 /// A clang tool that runs the preprocessor in a mode that's optimized for
133 /// dependency scanning for the given compiler invocation.
134 class DependencyScanningAction : public tooling::ToolAction {
135 public:
136   DependencyScanningAction(
137       StringRef WorkingDirectory, DependencyConsumer &Consumer,
138       DependencyActionController &Controller,
139       llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS,
140       ScanningOutputFormat Format, bool OptimizeArgs, bool EagerLoadModules,
141       bool DisableFree, std::optional<StringRef> ModuleName = std::nullopt)
142       : WorkingDirectory(WorkingDirectory), Consumer(Consumer),
143         Controller(Controller), DepFS(std::move(DepFS)), Format(Format),
144         OptimizeArgs(OptimizeArgs), EagerLoadModules(EagerLoadModules),
145         DisableFree(DisableFree), ModuleName(ModuleName) {}
146 
147   bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
148                      FileManager *FileMgr,
149                      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
150                      DiagnosticConsumer *DiagConsumer) override {
151     // Make a deep copy of the original Clang invocation.
152     CompilerInvocation OriginalInvocation(*Invocation);
153     // Restore the value of DisableFree, which may be modified by Tooling.
154     OriginalInvocation.getFrontendOpts().DisableFree = DisableFree;
155 
156     if (Scanned) {
157       // Scanning runs once for the first -cc1 invocation in a chain of driver
158       // jobs. For any dependent jobs, reuse the scanning result and just
159       // update the LastCC1Arguments to correspond to the new invocation.
160       // FIXME: to support multi-arch builds, each arch requires a separate scan
161       setLastCC1Arguments(std::move(OriginalInvocation));
162       return true;
163     }
164 
165     Scanned = true;
166 
167     // Create a compiler instance to handle the actual work.
168     ScanInstanceStorage.emplace(std::move(PCHContainerOps));
169     CompilerInstance &ScanInstance = *ScanInstanceStorage;
170     ScanInstance.setInvocation(std::move(Invocation));
171 
172     // Create the compiler's actual diagnostics engine.
173     sanitizeDiagOpts(ScanInstance.getDiagnosticOpts());
174     ScanInstance.createDiagnostics(DiagConsumer, /*ShouldOwnClient=*/false);
175     if (!ScanInstance.hasDiagnostics())
176       return false;
177 
178     ScanInstance.getPreprocessorOpts().AllowPCHWithDifferentModulesCachePath =
179         true;
180 
181     ScanInstance.getFrontendOpts().GenerateGlobalModuleIndex = false;
182     ScanInstance.getFrontendOpts().UseGlobalModuleIndex = false;
183     ScanInstance.getFrontendOpts().ModulesShareFileManager = false;
184     ScanInstance.getHeaderSearchOpts().ModuleFormat = "raw";
185 
186     ScanInstance.setFileManager(FileMgr);
187     // Support for virtual file system overlays.
188     FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation(
189         ScanInstance.getInvocation(), ScanInstance.getDiagnostics(),
190         FileMgr->getVirtualFileSystemPtr()));
191 
192     ScanInstance.createSourceManager(*FileMgr);
193 
194     // Store the list of prebuilt module files into header search options. This
195     // will prevent the implicit build to create duplicate modules and will
196     // force reuse of the existing prebuilt module files instead.
197     if (!ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
198       visitPrebuiltModule(
199           ScanInstance.getPreprocessorOpts().ImplicitPCHInclude, ScanInstance,
200           ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles);
201 
202     // Use the dependency scanning optimized file system if requested to do so.
203     if (DepFS) {
204       llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> LocalDepFS =
205           DepFS;
206       ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile =
207           [LocalDepFS = std::move(LocalDepFS)](FileEntryRef File)
208           -> std::optional<ArrayRef<dependency_directives_scan::Directive>> {
209         if (llvm::ErrorOr<EntryRef> Entry =
210                 LocalDepFS->getOrCreateFileSystemEntry(File.getName()))
211           return Entry->getDirectiveTokens();
212         return std::nullopt;
213       };
214     }
215 
216     // Create the dependency collector that will collect the produced
217     // dependencies.
218     //
219     // This also moves the existing dependency output options from the
220     // invocation to the collector. The options in the invocation are reset,
221     // which ensures that the compiler won't create new dependency collectors,
222     // and thus won't write out the extra '.d' files to disk.
223     auto Opts = std::make_unique<DependencyOutputOptions>();
224     std::swap(*Opts, ScanInstance.getInvocation().getDependencyOutputOpts());
225     // We need at least one -MT equivalent for the generator of make dependency
226     // files to work.
227     if (Opts->Targets.empty())
228       Opts->Targets = {
229           deduceDepTarget(ScanInstance.getFrontendOpts().OutputFile,
230                           ScanInstance.getFrontendOpts().Inputs)};
231     Opts->IncludeSystemHeaders = true;
232 
233     switch (Format) {
234     case ScanningOutputFormat::Make:
235       ScanInstance.addDependencyCollector(
236           std::make_shared<DependencyConsumerForwarder>(
237               std::move(Opts), WorkingDirectory, Consumer));
238       break;
239     case ScanningOutputFormat::P1689:
240     case ScanningOutputFormat::Full:
241       MDC = std::make_shared<ModuleDepCollector>(
242           std::move(Opts), ScanInstance, Consumer, Controller,
243           OriginalInvocation, OptimizeArgs, EagerLoadModules,
244           Format == ScanningOutputFormat::P1689);
245       ScanInstance.addDependencyCollector(MDC);
246       break;
247     }
248 
249     // Consider different header search and diagnostic options to create
250     // different modules. This avoids the unsound aliasing of module PCMs.
251     //
252     // TODO: Implement diagnostic bucketing to reduce the impact of strict
253     // context hashing.
254     ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true;
255 
256     // Avoid some checks and module map parsing when loading PCM files.
257     ScanInstance.getPreprocessorOpts().ModulesCheckRelocated = false;
258 
259     std::unique_ptr<FrontendAction> Action;
260 
261     if (ModuleName)
262       Action = std::make_unique<GetDependenciesByModuleNameAction>(*ModuleName);
263     else
264       Action = std::make_unique<ReadPCHAndPreprocessAction>();
265 
266     const bool Result = ScanInstance.ExecuteAction(*Action);
267 
268     if (Result)
269       setLastCC1Arguments(std::move(OriginalInvocation));
270 
271     return Result;
272   }
273 
274   bool hasScanned() const { return Scanned; }
275 
276   /// Take the cc1 arguments corresponding to the most recent invocation used
277   /// with this action. Any modifications implied by the discovered dependencies
278   /// will have already been applied.
279   std::vector<std::string> takeLastCC1Arguments() {
280     std::vector<std::string> Result;
281     std::swap(Result, LastCC1Arguments); // Reset LastCC1Arguments to empty.
282     return Result;
283   }
284 
285 private:
286   void setLastCC1Arguments(CompilerInvocation &&CI) {
287     if (MDC)
288       MDC->applyDiscoveredDependencies(CI);
289     LastCC1Arguments = CI.getCC1CommandLine();
290   }
291 
292 private:
293   StringRef WorkingDirectory;
294   DependencyConsumer &Consumer;
295   DependencyActionController &Controller;
296   llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
297   ScanningOutputFormat Format;
298   bool OptimizeArgs;
299   bool EagerLoadModules;
300   bool DisableFree;
301   std::optional<StringRef> ModuleName;
302   std::optional<CompilerInstance> ScanInstanceStorage;
303   std::shared_ptr<ModuleDepCollector> MDC;
304   std::vector<std::string> LastCC1Arguments;
305   bool Scanned = false;
306 };
307 
308 } // end anonymous namespace
309 
310 DependencyScanningWorker::DependencyScanningWorker(
311     DependencyScanningService &Service,
312     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
313     : Format(Service.getFormat()), OptimizeArgs(Service.canOptimizeArgs()),
314       EagerLoadModules(Service.shouldEagerLoadModules()) {
315   PCHContainerOps = std::make_shared<PCHContainerOperations>();
316   // We need to read object files from PCH built outside the scanner.
317   PCHContainerOps->registerReader(
318       std::make_unique<ObjectFilePCHContainerReader>());
319   // The scanner itself writes only raw ast files.
320   PCHContainerOps->registerWriter(std::make_unique<RawPCHContainerWriter>());
321 
322   switch (Service.getMode()) {
323   case ScanningMode::DependencyDirectivesScan:
324     DepFS =
325         new DependencyScanningWorkerFilesystem(Service.getSharedCache(), FS);
326     BaseFS = DepFS;
327     break;
328   case ScanningMode::CanonicalPreprocessing:
329     DepFS = nullptr;
330     BaseFS = FS;
331     break;
332   }
333 }
334 
335 llvm::Error DependencyScanningWorker::computeDependencies(
336     StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
337     DependencyConsumer &Consumer, DependencyActionController &Controller,
338     std::optional<StringRef> ModuleName) {
339   std::vector<const char *> CLI;
340   for (const std::string &Arg : CommandLine)
341     CLI.push_back(Arg.c_str());
342   auto DiagOpts = CreateAndPopulateDiagOpts(CLI);
343   sanitizeDiagOpts(*DiagOpts);
344 
345   // Capture the emitted diagnostics and report them to the client
346   // in the case of a failure.
347   std::string DiagnosticOutput;
348   llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput);
349   TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, DiagOpts.release());
350 
351   if (computeDependencies(WorkingDirectory, CommandLine, Consumer, Controller,
352                           DiagPrinter, ModuleName))
353     return llvm::Error::success();
354   return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(),
355                                              llvm::inconvertibleErrorCode());
356 }
357 
358 static bool forEachDriverJob(
359     ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags, FileManager &FM,
360     llvm::function_ref<bool(const driver::Command &Cmd)> Callback) {
361   SmallVector<const char *, 256> Argv;
362   Argv.reserve(ArgStrs.size());
363   for (const std::string &Arg : ArgStrs)
364     Argv.push_back(Arg.c_str());
365 
366   llvm::vfs::FileSystem *FS = &FM.getVirtualFileSystem();
367 
368   std::unique_ptr<driver::Driver> Driver = std::make_unique<driver::Driver>(
369       Argv[0], llvm::sys::getDefaultTargetTriple(), Diags,
370       "clang LLVM compiler", FS);
371   Driver->setTitle("clang_based_tool");
372 
373   llvm::BumpPtrAllocator Alloc;
374   bool CLMode = driver::IsClangCL(
375       driver::getDriverMode(Argv[0], ArrayRef(Argv).slice(1)));
376 
377   if (llvm::Error E = driver::expandResponseFiles(Argv, CLMode, Alloc, FS)) {
378     Diags.Report(diag::err_drv_expand_response_file)
379         << llvm::toString(std::move(E));
380     return false;
381   }
382 
383   const std::unique_ptr<driver::Compilation> Compilation(
384       Driver->BuildCompilation(llvm::ArrayRef(Argv)));
385   if (!Compilation)
386     return false;
387 
388   for (const driver::Command &Job : Compilation->getJobs()) {
389     if (!Callback(Job))
390       return false;
391   }
392   return true;
393 }
394 
395 bool DependencyScanningWorker::computeDependencies(
396     StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
397     DependencyConsumer &Consumer, DependencyActionController &Controller,
398     DiagnosticConsumer &DC, std::optional<StringRef> ModuleName) {
399   // Reset what might have been modified in the previous worker invocation.
400   BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
401 
402   std::optional<std::vector<std::string>> ModifiedCommandLine;
403   llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS;
404 
405   // If we're scanning based on a module name alone, we don't expect the client
406   // to provide us with an input file. However, the driver really wants to have
407   // one. Let's just make it up to make the driver happy.
408   if (ModuleName) {
409     auto OverlayFS =
410         llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
411     auto InMemoryFS =
412         llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
413     InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
414     OverlayFS->pushOverlay(InMemoryFS);
415     ModifiedFS = OverlayFS;
416 
417     SmallString<128> FakeInputPath;
418     // TODO: We should retry the creation if the path already exists.
419     llvm::sys::fs::createUniquePath(*ModuleName + "-%%%%%%%%.input",
420                                     FakeInputPath,
421                                     /*MakeAbsolute=*/false);
422     InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(""));
423 
424     ModifiedCommandLine = CommandLine;
425     ModifiedCommandLine->emplace_back(FakeInputPath);
426   }
427 
428   const std::vector<std::string> &FinalCommandLine =
429       ModifiedCommandLine ? *ModifiedCommandLine : CommandLine;
430   auto &FinalFS = ModifiedFS ? ModifiedFS : BaseFS;
431 
432   FileSystemOptions FSOpts;
433   FSOpts.WorkingDir = WorkingDirectory.str();
434   auto FileMgr = llvm::makeIntrusiveRefCnt<FileManager>(FSOpts, FinalFS);
435 
436   std::vector<const char *> FinalCCommandLine(FinalCommandLine.size(), nullptr);
437   llvm::transform(FinalCommandLine, FinalCCommandLine.begin(),
438                   [](const std::string &Str) { return Str.c_str(); });
439 
440   auto DiagOpts = CreateAndPopulateDiagOpts(FinalCCommandLine);
441   sanitizeDiagOpts(*DiagOpts);
442   IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
443       CompilerInstance::createDiagnostics(DiagOpts.release(), &DC,
444                                           /*ShouldOwnClient=*/false);
445 
446   // Although `Diagnostics` are used only for command-line parsing, the
447   // custom `DiagConsumer` might expect a `SourceManager` to be present.
448   SourceManager SrcMgr(*Diags, *FileMgr);
449   Diags->setSourceManager(&SrcMgr);
450   // DisableFree is modified by Tooling for running
451   // in-process; preserve the original value, which is
452   // always true for a driver invocation.
453   bool DisableFree = true;
454   DependencyScanningAction Action(WorkingDirectory, Consumer, Controller, DepFS,
455                                   Format, OptimizeArgs, EagerLoadModules,
456                                   DisableFree, ModuleName);
457   bool Success = forEachDriverJob(
458       FinalCommandLine, *Diags, *FileMgr, [&](const driver::Command &Cmd) {
459         if (StringRef(Cmd.getCreator().getName()) != "clang") {
460           // Non-clang command. Just pass through to the dependency
461           // consumer.
462           Consumer.handleBuildCommand(
463               {Cmd.getExecutable(),
464                {Cmd.getArguments().begin(), Cmd.getArguments().end()}});
465           return true;
466         }
467 
468         std::vector<std::string> Argv;
469         Argv.push_back(Cmd.getExecutable());
470         Argv.insert(Argv.end(), Cmd.getArguments().begin(),
471                     Cmd.getArguments().end());
472 
473         // Create an invocation that uses the underlying file
474         // system to ensure that any file system requests that
475         // are made by the driver do not go through the
476         // dependency scanning filesystem.
477         ToolInvocation Invocation(std::move(Argv), &Action, &*FileMgr,
478                                   PCHContainerOps);
479         Invocation.setDiagnosticConsumer(Diags->getClient());
480         Invocation.setDiagnosticOptions(&Diags->getDiagnosticOptions());
481         if (!Invocation.run())
482           return false;
483 
484         std::vector<std::string> Args = Action.takeLastCC1Arguments();
485         Consumer.handleBuildCommand({Cmd.getExecutable(), std::move(Args)});
486         return true;
487       });
488 
489   if (Success && !Action.hasScanned())
490     Diags->Report(diag::err_fe_expected_compiler_job)
491         << llvm::join(FinalCommandLine, " ");
492   return Success && Action.hasScanned();
493 }
494 
495 DependencyActionController::~DependencyActionController() {}
496