1 //===- DependencyScanningWorker.cpp - clang-scan-deps worker --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
10 #include "clang/Basic/DiagnosticFrontend.h"
11 #include "clang/CodeGen/ObjectFilePCHContainerOperations.h"
12 #include "clang/Driver/Compilation.h"
13 #include "clang/Driver/Driver.h"
14 #include "clang/Driver/Job.h"
15 #include "clang/Driver/Tool.h"
16 #include "clang/Frontend/CompilerInstance.h"
17 #include "clang/Frontend/CompilerInvocation.h"
18 #include "clang/Frontend/FrontendActions.h"
19 #include "clang/Frontend/TextDiagnosticPrinter.h"
20 #include "clang/Frontend/Utils.h"
21 #include "clang/Lex/PreprocessorOptions.h"
22 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
23 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
24 #include "clang/Tooling/Tooling.h"
25 #include "llvm/Support/Host.h"
26 #include <optional>
27 
28 using namespace clang;
29 using namespace tooling;
30 using namespace dependencies;
31 
32 namespace {
33 
34 /// Forwards the gatherered dependencies to the consumer.
35 class DependencyConsumerForwarder : public DependencyFileGenerator {
36 public:
DependencyConsumerForwarder(std::unique_ptr<DependencyOutputOptions> Opts,StringRef WorkingDirectory,DependencyConsumer & C)37   DependencyConsumerForwarder(std::unique_ptr<DependencyOutputOptions> Opts,
38                               StringRef WorkingDirectory, DependencyConsumer &C)
39       : DependencyFileGenerator(*Opts), WorkingDirectory(WorkingDirectory),
40         Opts(std::move(Opts)), C(C) {}
41 
finishedMainFile(DiagnosticsEngine & Diags)42   void finishedMainFile(DiagnosticsEngine &Diags) override {
43     C.handleDependencyOutputOpts(*Opts);
44     llvm::SmallString<256> CanonPath;
45     for (const auto &File : getDependencies()) {
46       CanonPath = File;
47       llvm::sys::path::remove_dots(CanonPath, /*remove_dot_dot=*/true);
48       llvm::sys::fs::make_absolute(WorkingDirectory, CanonPath);
49       C.handleFileDependency(CanonPath);
50     }
51   }
52 
53 private:
54   StringRef WorkingDirectory;
55   std::unique_ptr<DependencyOutputOptions> Opts;
56   DependencyConsumer &C;
57 };
58 
59 using PrebuiltModuleFilesT = decltype(HeaderSearchOptions::PrebuiltModuleFiles);
60 
61 /// A listener that collects the imported modules and optionally the input
62 /// files.
63 class PrebuiltModuleListener : public ASTReaderListener {
64 public:
PrebuiltModuleListener(PrebuiltModuleFilesT & PrebuiltModuleFiles,llvm::StringSet<> & InputFiles,bool VisitInputFiles,llvm::SmallVector<std::string> & NewModuleFiles)65   PrebuiltModuleListener(PrebuiltModuleFilesT &PrebuiltModuleFiles,
66                          llvm::StringSet<> &InputFiles, bool VisitInputFiles,
67                          llvm::SmallVector<std::string> &NewModuleFiles)
68       : PrebuiltModuleFiles(PrebuiltModuleFiles), InputFiles(InputFiles),
69         VisitInputFiles(VisitInputFiles), NewModuleFiles(NewModuleFiles) {}
70 
needsImportVisitation() const71   bool needsImportVisitation() const override { return true; }
needsInputFileVisitation()72   bool needsInputFileVisitation() override { return VisitInputFiles; }
needsSystemInputFileVisitation()73   bool needsSystemInputFileVisitation() override { return VisitInputFiles; }
74 
visitImport(StringRef ModuleName,StringRef Filename)75   void visitImport(StringRef ModuleName, StringRef Filename) override {
76     if (PrebuiltModuleFiles.insert({ModuleName.str(), Filename.str()}).second)
77       NewModuleFiles.push_back(Filename.str());
78   }
79 
visitInputFile(StringRef Filename,bool isSystem,bool isOverridden,bool isExplicitModule)80   bool visitInputFile(StringRef Filename, bool isSystem, bool isOverridden,
81                       bool isExplicitModule) override {
82     InputFiles.insert(Filename);
83     return true;
84   }
85 
86 private:
87   PrebuiltModuleFilesT &PrebuiltModuleFiles;
88   llvm::StringSet<> &InputFiles;
89   bool VisitInputFiles;
90   llvm::SmallVector<std::string> &NewModuleFiles;
91 };
92 
93 /// Visit the given prebuilt module and collect all of the modules it
94 /// transitively imports and contributing input files.
visitPrebuiltModule(StringRef PrebuiltModuleFilename,CompilerInstance & CI,PrebuiltModuleFilesT & ModuleFiles,llvm::StringSet<> & InputFiles,bool VisitInputFiles)95 static void visitPrebuiltModule(StringRef PrebuiltModuleFilename,
96                                 CompilerInstance &CI,
97                                 PrebuiltModuleFilesT &ModuleFiles,
98                                 llvm::StringSet<> &InputFiles,
99                                 bool VisitInputFiles) {
100   // List of module files to be processed.
101   llvm::SmallVector<std::string> Worklist{PrebuiltModuleFilename.str()};
102   PrebuiltModuleListener Listener(ModuleFiles, InputFiles, VisitInputFiles,
103                                   Worklist);
104 
105   while (!Worklist.empty())
106     ASTReader::readASTFileControlBlock(
107         Worklist.pop_back_val(), CI.getFileManager(), CI.getModuleCache(),
108         CI.getPCHContainerReader(),
109         /*FindModuleFileExtensions=*/false, Listener,
110         /*ValidateDiagnosticOptions=*/false);
111 }
112 
113 /// Transform arbitrary file name into an object-like file name.
makeObjFileName(StringRef FileName)114 static std::string makeObjFileName(StringRef FileName) {
115   SmallString<128> ObjFileName(FileName);
116   llvm::sys::path::replace_extension(ObjFileName, "o");
117   return std::string(ObjFileName.str());
118 }
119 
120 /// Deduce the dependency target based on the output file and input files.
121 static std::string
deduceDepTarget(const std::string & OutputFile,const SmallVectorImpl<FrontendInputFile> & InputFiles)122 deduceDepTarget(const std::string &OutputFile,
123                 const SmallVectorImpl<FrontendInputFile> &InputFiles) {
124   if (OutputFile != "-")
125     return OutputFile;
126 
127   if (InputFiles.empty() || !InputFiles.front().isFile())
128     return "clang-scan-deps\\ dependency";
129 
130   return makeObjFileName(InputFiles.front().getFile());
131 }
132 
133 /// Sanitize diagnostic options for dependency scan.
sanitizeDiagOpts(DiagnosticOptions & DiagOpts)134 static void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
135   // Don't print 'X warnings and Y errors generated'.
136   DiagOpts.ShowCarets = false;
137   // Don't write out diagnostic file.
138   DiagOpts.DiagnosticSerializationFile.clear();
139   // Don't emit warnings as errors (and all other warnings too).
140   DiagOpts.IgnoreWarnings = true;
141 }
142 
143 /// A clang tool that runs the preprocessor in a mode that's optimized for
144 /// dependency scanning for the given compiler invocation.
145 class DependencyScanningAction : public tooling::ToolAction {
146 public:
DependencyScanningAction(StringRef WorkingDirectory,DependencyConsumer & Consumer,llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS,ScanningOutputFormat Format,bool OptimizeArgs,bool EagerLoadModules,bool DisableFree,std::optional<StringRef> ModuleName=std::nullopt)147   DependencyScanningAction(
148       StringRef WorkingDirectory, DependencyConsumer &Consumer,
149       llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS,
150       ScanningOutputFormat Format, bool OptimizeArgs, bool EagerLoadModules,
151       bool DisableFree, std::optional<StringRef> ModuleName = std::nullopt)
152       : WorkingDirectory(WorkingDirectory), Consumer(Consumer),
153         DepFS(std::move(DepFS)), Format(Format), OptimizeArgs(OptimizeArgs),
154         EagerLoadModules(EagerLoadModules), DisableFree(DisableFree),
155         ModuleName(ModuleName) {}
156 
runInvocation(std::shared_ptr<CompilerInvocation> Invocation,FileManager * FileMgr,std::shared_ptr<PCHContainerOperations> PCHContainerOps,DiagnosticConsumer * DiagConsumer)157   bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
158                      FileManager *FileMgr,
159                      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
160                      DiagnosticConsumer *DiagConsumer) override {
161     // Make a deep copy of the original Clang invocation.
162     CompilerInvocation OriginalInvocation(*Invocation);
163     // Restore the value of DisableFree, which may be modified by Tooling.
164     OriginalInvocation.getFrontendOpts().DisableFree = DisableFree;
165 
166     if (Scanned) {
167       // Scanning runs once for the first -cc1 invocation in a chain of driver
168       // jobs. For any dependent jobs, reuse the scanning result and just
169       // update the LastCC1Arguments to correspond to the new invocation.
170       // FIXME: to support multi-arch builds, each arch requires a separate scan
171       setLastCC1Arguments(std::move(OriginalInvocation));
172       return true;
173     }
174 
175     Scanned = true;
176 
177     // Create a compiler instance to handle the actual work.
178     ScanInstanceStorage.emplace(std::move(PCHContainerOps));
179     CompilerInstance &ScanInstance = *ScanInstanceStorage;
180     ScanInstance.setInvocation(std::move(Invocation));
181 
182     // Create the compiler's actual diagnostics engine.
183     sanitizeDiagOpts(ScanInstance.getDiagnosticOpts());
184     ScanInstance.createDiagnostics(DiagConsumer, /*ShouldOwnClient=*/false);
185     if (!ScanInstance.hasDiagnostics())
186       return false;
187 
188     ScanInstance.getPreprocessorOpts().AllowPCHWithDifferentModulesCachePath =
189         true;
190 
191     ScanInstance.getFrontendOpts().GenerateGlobalModuleIndex = false;
192     ScanInstance.getFrontendOpts().UseGlobalModuleIndex = false;
193     ScanInstance.getFrontendOpts().ModulesShareFileManager = false;
194 
195     ScanInstance.setFileManager(FileMgr);
196     // Support for virtual file system overlays.
197     FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation(
198         ScanInstance.getInvocation(), ScanInstance.getDiagnostics(),
199         FileMgr->getVirtualFileSystemPtr()));
200 
201     ScanInstance.createSourceManager(*FileMgr);
202 
203     llvm::StringSet<> PrebuiltModulesInputFiles;
204     // Store the list of prebuilt module files into header search options. This
205     // will prevent the implicit build to create duplicate modules and will
206     // force reuse of the existing prebuilt module files instead.
207     if (!ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
208       visitPrebuiltModule(
209           ScanInstance.getPreprocessorOpts().ImplicitPCHInclude, ScanInstance,
210           ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles,
211           PrebuiltModulesInputFiles, /*VisitInputFiles=*/DepFS != nullptr);
212 
213     // Use the dependency scanning optimized file system if requested to do so.
214     if (DepFS) {
215       llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> LocalDepFS =
216           DepFS;
217       ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile =
218           [LocalDepFS = std::move(LocalDepFS)](FileEntryRef File)
219           -> std::optional<ArrayRef<dependency_directives_scan::Directive>> {
220         if (llvm::ErrorOr<EntryRef> Entry =
221                 LocalDepFS->getOrCreateFileSystemEntry(File.getName()))
222           return Entry->getDirectiveTokens();
223         return std::nullopt;
224       };
225     }
226 
227     // Create the dependency collector that will collect the produced
228     // dependencies.
229     //
230     // This also moves the existing dependency output options from the
231     // invocation to the collector. The options in the invocation are reset,
232     // which ensures that the compiler won't create new dependency collectors,
233     // and thus won't write out the extra '.d' files to disk.
234     auto Opts = std::make_unique<DependencyOutputOptions>();
235     std::swap(*Opts, ScanInstance.getInvocation().getDependencyOutputOpts());
236     // We need at least one -MT equivalent for the generator of make dependency
237     // files to work.
238     if (Opts->Targets.empty())
239       Opts->Targets = {
240           deduceDepTarget(ScanInstance.getFrontendOpts().OutputFile,
241                           ScanInstance.getFrontendOpts().Inputs)};
242     Opts->IncludeSystemHeaders = true;
243 
244     switch (Format) {
245     case ScanningOutputFormat::Make:
246       ScanInstance.addDependencyCollector(
247           std::make_shared<DependencyConsumerForwarder>(
248               std::move(Opts), WorkingDirectory, Consumer));
249       break;
250     case ScanningOutputFormat::P1689:
251     case ScanningOutputFormat::Full:
252       MDC = std::make_shared<ModuleDepCollector>(
253           std::move(Opts), ScanInstance, Consumer, OriginalInvocation,
254           OptimizeArgs, EagerLoadModules,
255           Format == ScanningOutputFormat::P1689);
256       ScanInstance.addDependencyCollector(MDC);
257       break;
258     }
259 
260     // Consider different header search and diagnostic options to create
261     // different modules. This avoids the unsound aliasing of module PCMs.
262     //
263     // TODO: Implement diagnostic bucketing to reduce the impact of strict
264     // context hashing.
265     ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true;
266 
267     std::unique_ptr<FrontendAction> Action;
268 
269     if (ModuleName)
270       Action = std::make_unique<GetDependenciesByModuleNameAction>(*ModuleName);
271     else
272       Action = std::make_unique<ReadPCHAndPreprocessAction>();
273 
274     const bool Result = ScanInstance.ExecuteAction(*Action);
275 
276     if (Result)
277       setLastCC1Arguments(std::move(OriginalInvocation));
278 
279     return Result;
280   }
281 
hasScanned() const282   bool hasScanned() const { return Scanned; }
283 
284   /// Take the cc1 arguments corresponding to the most recent invocation used
285   /// with this action. Any modifications implied by the discovered dependencies
286   /// will have already been applied.
takeLastCC1Arguments()287   std::vector<std::string> takeLastCC1Arguments() {
288     std::vector<std::string> Result;
289     std::swap(Result, LastCC1Arguments); // Reset LastCC1Arguments to empty.
290     return Result;
291   }
292 
293 private:
setLastCC1Arguments(CompilerInvocation && CI)294   void setLastCC1Arguments(CompilerInvocation &&CI) {
295     if (MDC)
296       MDC->applyDiscoveredDependencies(CI);
297     LastCC1Arguments = CI.getCC1CommandLine();
298   }
299 
300 private:
301   StringRef WorkingDirectory;
302   DependencyConsumer &Consumer;
303   llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
304   ScanningOutputFormat Format;
305   bool OptimizeArgs;
306   bool EagerLoadModules;
307   bool DisableFree;
308   std::optional<StringRef> ModuleName;
309   std::optional<CompilerInstance> ScanInstanceStorage;
310   std::shared_ptr<ModuleDepCollector> MDC;
311   std::vector<std::string> LastCC1Arguments;
312   bool Scanned = false;
313 };
314 
315 } // end anonymous namespace
316 
DependencyScanningWorker(DependencyScanningService & Service,llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)317 DependencyScanningWorker::DependencyScanningWorker(
318     DependencyScanningService &Service,
319     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
320     : Format(Service.getFormat()), OptimizeArgs(Service.canOptimizeArgs()),
321       EagerLoadModules(Service.shouldEagerLoadModules()) {
322   PCHContainerOps = std::make_shared<PCHContainerOperations>();
323   PCHContainerOps->registerReader(
324       std::make_unique<ObjectFilePCHContainerReader>());
325   // We don't need to write object files, but the current PCH implementation
326   // requires the writer to be registered as well.
327   PCHContainerOps->registerWriter(
328       std::make_unique<ObjectFilePCHContainerWriter>());
329 
330   switch (Service.getMode()) {
331   case ScanningMode::DependencyDirectivesScan:
332     DepFS =
333         new DependencyScanningWorkerFilesystem(Service.getSharedCache(), FS);
334     BaseFS = DepFS;
335     break;
336   case ScanningMode::CanonicalPreprocessing:
337     DepFS = nullptr;
338     BaseFS = FS;
339     break;
340   }
341 }
342 
computeDependencies(StringRef WorkingDirectory,const std::vector<std::string> & CommandLine,DependencyConsumer & Consumer,std::optional<StringRef> ModuleName)343 llvm::Error DependencyScanningWorker::computeDependencies(
344     StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
345     DependencyConsumer &Consumer, std::optional<StringRef> ModuleName) {
346   std::vector<const char *> CLI;
347   for (const std::string &Arg : CommandLine)
348     CLI.push_back(Arg.c_str());
349   auto DiagOpts = CreateAndPopulateDiagOpts(CLI);
350   sanitizeDiagOpts(*DiagOpts);
351 
352   // Capture the emitted diagnostics and report them to the client
353   // in the case of a failure.
354   std::string DiagnosticOutput;
355   llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput);
356   TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, DiagOpts.release());
357 
358   if (computeDependencies(WorkingDirectory, CommandLine, Consumer, DiagPrinter,
359                           ModuleName))
360     return llvm::Error::success();
361   return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(),
362                                              llvm::inconvertibleErrorCode());
363 }
364 
forEachDriverJob(ArrayRef<std::string> Args,DiagnosticsEngine & Diags,FileManager & FM,llvm::function_ref<bool (const driver::Command & Cmd)> Callback)365 static bool forEachDriverJob(
366     ArrayRef<std::string> Args, DiagnosticsEngine &Diags, FileManager &FM,
367     llvm::function_ref<bool(const driver::Command &Cmd)> Callback) {
368   std::unique_ptr<driver::Driver> Driver = std::make_unique<driver::Driver>(
369       Args[0], llvm::sys::getDefaultTargetTriple(), Diags,
370       "clang LLVM compiler", &FM.getVirtualFileSystem());
371   Driver->setTitle("clang_based_tool");
372 
373   std::vector<const char *> Argv;
374   for (const std::string &Arg : Args)
375     Argv.push_back(Arg.c_str());
376 
377   const std::unique_ptr<driver::Compilation> Compilation(
378       Driver->BuildCompilation(llvm::ArrayRef(Argv)));
379   if (!Compilation)
380     return false;
381 
382   for (const driver::Command &Job : Compilation->getJobs()) {
383     if (!Callback(Job))
384       return false;
385   }
386   return true;
387 }
388 
computeDependencies(StringRef WorkingDirectory,const std::vector<std::string> & CommandLine,DependencyConsumer & Consumer,DiagnosticConsumer & DC,std::optional<StringRef> ModuleName)389 bool DependencyScanningWorker::computeDependencies(
390     StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
391     DependencyConsumer &Consumer, DiagnosticConsumer &DC,
392     std::optional<StringRef> ModuleName) {
393   // Reset what might have been modified in the previous worker invocation.
394   BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
395 
396   std::optional<std::vector<std::string>> ModifiedCommandLine;
397   llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS;
398   if (ModuleName) {
399     ModifiedCommandLine = CommandLine;
400     ModifiedCommandLine->emplace_back(*ModuleName);
401 
402     auto OverlayFS =
403         llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
404     auto InMemoryFS =
405         llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
406     InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
407     InMemoryFS->addFile(*ModuleName, 0, llvm::MemoryBuffer::getMemBuffer(""));
408     OverlayFS->pushOverlay(InMemoryFS);
409     ModifiedFS = OverlayFS;
410   }
411 
412   const std::vector<std::string> &FinalCommandLine =
413       ModifiedCommandLine ? *ModifiedCommandLine : CommandLine;
414 
415   FileSystemOptions FSOpts;
416   FSOpts.WorkingDir = WorkingDirectory.str();
417   auto FileMgr = llvm::makeIntrusiveRefCnt<FileManager>(
418       FSOpts, ModifiedFS ? ModifiedFS : BaseFS);
419 
420   std::vector<const char *> FinalCCommandLine(CommandLine.size(), nullptr);
421   llvm::transform(CommandLine, FinalCCommandLine.begin(),
422                   [](const std::string &Str) { return Str.c_str(); });
423 
424   auto DiagOpts = CreateAndPopulateDiagOpts(FinalCCommandLine);
425   sanitizeDiagOpts(*DiagOpts);
426   IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
427       CompilerInstance::createDiagnostics(DiagOpts.release(), &DC,
428                                           /*ShouldOwnClient=*/false);
429 
430   // Although `Diagnostics` are used only for command-line parsing, the
431   // custom `DiagConsumer` might expect a `SourceManager` to be present.
432   SourceManager SrcMgr(*Diags, *FileMgr);
433   Diags->setSourceManager(&SrcMgr);
434   // DisableFree is modified by Tooling for running
435   // in-process; preserve the original value, which is
436   // always true for a driver invocation.
437   bool DisableFree = true;
438   DependencyScanningAction Action(WorkingDirectory, Consumer, DepFS, Format,
439                                   OptimizeArgs, EagerLoadModules, DisableFree,
440                                   ModuleName);
441   bool Success = forEachDriverJob(
442       FinalCommandLine, *Diags, *FileMgr, [&](const driver::Command &Cmd) {
443         if (StringRef(Cmd.getCreator().getName()) != "clang") {
444           // Non-clang command. Just pass through to the dependency
445           // consumer.
446           Consumer.handleBuildCommand(
447               {Cmd.getExecutable(),
448                {Cmd.getArguments().begin(), Cmd.getArguments().end()}});
449           return true;
450         }
451 
452         std::vector<std::string> Argv;
453         Argv.push_back(Cmd.getExecutable());
454         Argv.insert(Argv.end(), Cmd.getArguments().begin(),
455                     Cmd.getArguments().end());
456 
457         // Create an invocation that uses the underlying file
458         // system to ensure that any file system requests that
459         // are made by the driver do not go through the
460         // dependency scanning filesystem.
461         ToolInvocation Invocation(std::move(Argv), &Action, &*FileMgr,
462                                   PCHContainerOps);
463         Invocation.setDiagnosticConsumer(Diags->getClient());
464         Invocation.setDiagnosticOptions(&Diags->getDiagnosticOptions());
465         if (!Invocation.run())
466           return false;
467 
468         std::vector<std::string> Args = Action.takeLastCC1Arguments();
469         Consumer.handleBuildCommand({Cmd.getExecutable(), std::move(Args)});
470         return true;
471       });
472 
473   if (Success && !Action.hasScanned())
474     Diags->Report(diag::err_fe_expected_compiler_job)
475         << llvm::join(FinalCommandLine, " ");
476   return Success && Action.hasScanned();
477 }
478