1 //===- CompilationDatabase.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file contains implementations of the CompilationDatabase base class
10 //  and the FixedCompilationDatabase.
11 //
12 //  FIXME: Various functions that take a string &ErrorMessage should be upgraded
13 //  to Expected.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "clang/Tooling/CompilationDatabase.h"
18 #include "clang/Basic/Diagnostic.h"
19 #include "clang/Basic/DiagnosticIDs.h"
20 #include "clang/Basic/DiagnosticOptions.h"
21 #include "clang/Basic/LLVM.h"
22 #include "clang/Driver/Action.h"
23 #include "clang/Driver/Compilation.h"
24 #include "clang/Driver/Driver.h"
25 #include "clang/Driver/DriverDiagnostic.h"
26 #include "clang/Driver/Job.h"
27 #include "clang/Frontend/TextDiagnosticPrinter.h"
28 #include "clang/Tooling/CompilationDatabasePluginRegistry.h"
29 #include "clang/Tooling/Tooling.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/IntrusiveRefCntPtr.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/SmallString.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/Option/Arg.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/Compiler.h"
39 #include "llvm/Support/ErrorOr.h"
40 #include "llvm/Support/Host.h"
41 #include "llvm/Support/LineIterator.h"
42 #include "llvm/Support/MemoryBuffer.h"
43 #include "llvm/Support/Path.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include <algorithm>
46 #include <cassert>
47 #include <cstring>
48 #include <iterator>
49 #include <memory>
50 #include <sstream>
51 #include <string>
52 #include <system_error>
53 #include <utility>
54 #include <vector>
55 
56 using namespace clang;
57 using namespace tooling;
58 
59 LLVM_INSTANTIATE_REGISTRY(CompilationDatabasePluginRegistry)
60 
61 CompilationDatabase::~CompilationDatabase() = default;
62 
63 std::unique_ptr<CompilationDatabase>
64 CompilationDatabase::loadFromDirectory(StringRef BuildDirectory,
65                                        std::string &ErrorMessage) {
66   llvm::raw_string_ostream ErrorStream(ErrorMessage);
67   for (CompilationDatabasePluginRegistry::iterator
68        It = CompilationDatabasePluginRegistry::begin(),
69        Ie = CompilationDatabasePluginRegistry::end();
70        It != Ie; ++It) {
71     std::string DatabaseErrorMessage;
72     std::unique_ptr<CompilationDatabasePlugin> Plugin(It->instantiate());
73     if (std::unique_ptr<CompilationDatabase> DB =
74             Plugin->loadFromDirectory(BuildDirectory, DatabaseErrorMessage))
75       return DB;
76     ErrorStream << It->getName() << ": " << DatabaseErrorMessage << "\n";
77   }
78   return nullptr;
79 }
80 
81 static std::unique_ptr<CompilationDatabase>
82 findCompilationDatabaseFromDirectory(StringRef Directory,
83                                      std::string &ErrorMessage) {
84   std::stringstream ErrorStream;
85   bool HasErrorMessage = false;
86   while (!Directory.empty()) {
87     std::string LoadErrorMessage;
88 
89     if (std::unique_ptr<CompilationDatabase> DB =
90             CompilationDatabase::loadFromDirectory(Directory, LoadErrorMessage))
91       return DB;
92 
93     if (!HasErrorMessage) {
94       ErrorStream << "No compilation database found in " << Directory.str()
95                   << " or any parent directory\n" << LoadErrorMessage;
96       HasErrorMessage = true;
97     }
98 
99     Directory = llvm::sys::path::parent_path(Directory);
100   }
101   ErrorMessage = ErrorStream.str();
102   return nullptr;
103 }
104 
105 std::unique_ptr<CompilationDatabase>
106 CompilationDatabase::autoDetectFromSource(StringRef SourceFile,
107                                           std::string &ErrorMessage) {
108   SmallString<1024> AbsolutePath(getAbsolutePath(SourceFile));
109   StringRef Directory = llvm::sys::path::parent_path(AbsolutePath);
110 
111   std::unique_ptr<CompilationDatabase> DB =
112       findCompilationDatabaseFromDirectory(Directory, ErrorMessage);
113 
114   if (!DB)
115     ErrorMessage = ("Could not auto-detect compilation database for file \"" +
116                    SourceFile + "\"\n" + ErrorMessage).str();
117   return DB;
118 }
119 
120 std::unique_ptr<CompilationDatabase>
121 CompilationDatabase::autoDetectFromDirectory(StringRef SourceDir,
122                                              std::string &ErrorMessage) {
123   SmallString<1024> AbsolutePath(getAbsolutePath(SourceDir));
124 
125   std::unique_ptr<CompilationDatabase> DB =
126       findCompilationDatabaseFromDirectory(AbsolutePath, ErrorMessage);
127 
128   if (!DB)
129     ErrorMessage = ("Could not auto-detect compilation database from directory \"" +
130                    SourceDir + "\"\n" + ErrorMessage).str();
131   return DB;
132 }
133 
134 std::vector<CompileCommand> CompilationDatabase::getAllCompileCommands() const {
135   std::vector<CompileCommand> Result;
136   for (const auto &File : getAllFiles()) {
137     auto C = getCompileCommands(File);
138     std::move(C.begin(), C.end(), std::back_inserter(Result));
139   }
140   return Result;
141 }
142 
143 CompilationDatabasePlugin::~CompilationDatabasePlugin() = default;
144 
145 namespace {
146 
147 // Helper for recursively searching through a chain of actions and collecting
148 // all inputs, direct and indirect, of compile jobs.
149 struct CompileJobAnalyzer {
150   SmallVector<std::string, 2> Inputs;
151 
152   void run(const driver::Action *A) {
153     runImpl(A, false);
154   }
155 
156 private:
157   void runImpl(const driver::Action *A, bool Collect) {
158     bool CollectChildren = Collect;
159     switch (A->getKind()) {
160     case driver::Action::CompileJobClass:
161       CollectChildren = true;
162       break;
163 
164     case driver::Action::InputClass:
165       if (Collect) {
166         const auto *IA = cast<driver::InputAction>(A);
167         Inputs.push_back(IA->getInputArg().getSpelling());
168       }
169       break;
170 
171     default:
172       // Don't care about others
173       break;
174     }
175 
176     for (const driver::Action *AI : A->inputs())
177       runImpl(AI, CollectChildren);
178   }
179 };
180 
181 // Special DiagnosticConsumer that looks for warn_drv_input_file_unused
182 // diagnostics from the driver and collects the option strings for those unused
183 // options.
184 class UnusedInputDiagConsumer : public DiagnosticConsumer {
185 public:
186   UnusedInputDiagConsumer(DiagnosticConsumer &Other) : Other(Other) {}
187 
188   void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
189                         const Diagnostic &Info) override {
190     if (Info.getID() == diag::warn_drv_input_file_unused) {
191       // Arg 1 for this diagnostic is the option that didn't get used.
192       UnusedInputs.push_back(Info.getArgStdStr(0));
193     } else if (DiagLevel >= DiagnosticsEngine::Error) {
194       // If driver failed to create compilation object, show the diagnostics
195       // to user.
196       Other.HandleDiagnostic(DiagLevel, Info);
197     }
198   }
199 
200   DiagnosticConsumer &Other;
201   SmallVector<std::string, 2> UnusedInputs;
202 };
203 
204 // Unary functor for asking "Given a StringRef S1, does there exist a string
205 // S2 in Arr where S1 == S2?"
206 struct MatchesAny {
207   MatchesAny(ArrayRef<std::string> Arr) : Arr(Arr) {}
208 
209   bool operator() (StringRef S) {
210     for (const std::string *I = Arr.begin(), *E = Arr.end(); I != E; ++I)
211       if (*I == S)
212         return true;
213     return false;
214   }
215 
216 private:
217   ArrayRef<std::string> Arr;
218 };
219 
220 // Filter of tools unused flags such as -no-integrated-as and -Wa,*.
221 // They are not used for syntax checking, and could confuse targets
222 // which don't support these options.
223 struct FilterUnusedFlags {
224   bool operator() (StringRef S) {
225     return (S == "-no-integrated-as") || S.startswith("-Wa,");
226   }
227 };
228 
229 std::string GetClangToolCommand() {
230   static int Dummy;
231   std::string ClangExecutable =
232       llvm::sys::fs::getMainExecutable("clang", (void *)&Dummy);
233   SmallString<128> ClangToolPath;
234   ClangToolPath = llvm::sys::path::parent_path(ClangExecutable);
235   llvm::sys::path::append(ClangToolPath, "clang-tool");
236   return ClangToolPath.str();
237 }
238 
239 } // namespace
240 
241 /// Strips any positional args and possible argv[0] from a command-line
242 /// provided by the user to construct a FixedCompilationDatabase.
243 ///
244 /// FixedCompilationDatabase requires a command line to be in this format as it
245 /// constructs the command line for each file by appending the name of the file
246 /// to be compiled. FixedCompilationDatabase also adds its own argv[0] to the
247 /// start of the command line although its value is not important as it's just
248 /// ignored by the Driver invoked by the ClangTool using the
249 /// FixedCompilationDatabase.
250 ///
251 /// FIXME: This functionality should probably be made available by
252 /// clang::driver::Driver although what the interface should look like is not
253 /// clear.
254 ///
255 /// \param[in] Args Args as provided by the user.
256 /// \return Resulting stripped command line.
257 ///          \li true if successful.
258 ///          \li false if \c Args cannot be used for compilation jobs (e.g.
259 ///          contains an option like -E or -version).
260 static bool stripPositionalArgs(std::vector<const char *> Args,
261                                 std::vector<std::string> &Result,
262                                 std::string &ErrorMsg) {
263   IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
264   llvm::raw_string_ostream Output(ErrorMsg);
265   TextDiagnosticPrinter DiagnosticPrinter(Output, &*DiagOpts);
266   UnusedInputDiagConsumer DiagClient(DiagnosticPrinter);
267   DiagnosticsEngine Diagnostics(
268       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()),
269       &*DiagOpts, &DiagClient, false);
270 
271   // The clang executable path isn't required since the jobs the driver builds
272   // will not be executed.
273   std::unique_ptr<driver::Driver> NewDriver(new driver::Driver(
274       /* ClangExecutable= */ "", llvm::sys::getDefaultTargetTriple(),
275       Diagnostics));
276   NewDriver->setCheckInputsExist(false);
277 
278   // This becomes the new argv[0]. The value is used to detect libc++ include
279   // dirs on Mac, it isn't used for other platforms.
280   std::string Argv0 = GetClangToolCommand();
281   Args.insert(Args.begin(), Argv0.c_str());
282 
283   // By adding -c, we force the driver to treat compilation as the last phase.
284   // It will then issue warnings via Diagnostics about un-used options that
285   // would have been used for linking. If the user provided a compiler name as
286   // the original argv[0], this will be treated as a linker input thanks to
287   // insertng a new argv[0] above. All un-used options get collected by
288   // UnusedInputdiagConsumer and get stripped out later.
289   Args.push_back("-c");
290 
291   // Put a dummy C++ file on to ensure there's at least one compile job for the
292   // driver to construct. If the user specified some other argument that
293   // prevents compilation, e.g. -E or something like -version, we may still end
294   // up with no jobs but then this is the user's fault.
295   Args.push_back("placeholder.cpp");
296 
297   Args.erase(std::remove_if(Args.begin(), Args.end(), FilterUnusedFlags()),
298              Args.end());
299 
300   const std::unique_ptr<driver::Compilation> Compilation(
301       NewDriver->BuildCompilation(Args));
302   if (!Compilation)
303     return false;
304 
305   const driver::JobList &Jobs = Compilation->getJobs();
306 
307   CompileJobAnalyzer CompileAnalyzer;
308 
309   for (const auto &Cmd : Jobs) {
310     // Collect only for Assemble, Backend, and Compile jobs. If we do all jobs
311     // we get duplicates since Link jobs point to Assemble jobs as inputs.
312     // -flto* flags make the BackendJobClass, which still needs analyzer.
313     if (Cmd.getSource().getKind() == driver::Action::AssembleJobClass ||
314         Cmd.getSource().getKind() == driver::Action::BackendJobClass ||
315         Cmd.getSource().getKind() == driver::Action::CompileJobClass) {
316       CompileAnalyzer.run(&Cmd.getSource());
317     }
318   }
319 
320   if (CompileAnalyzer.Inputs.empty()) {
321     ErrorMsg = "warning: no compile jobs found\n";
322     return false;
323   }
324 
325   // Remove all compilation input files from the command line. This is
326   // necessary so that getCompileCommands() can construct a command line for
327   // each file.
328   std::vector<const char *>::iterator End = std::remove_if(
329       Args.begin(), Args.end(), MatchesAny(CompileAnalyzer.Inputs));
330 
331   // Remove all inputs deemed unused for compilation.
332   End = std::remove_if(Args.begin(), End, MatchesAny(DiagClient.UnusedInputs));
333 
334   // Remove the -c add above as well. It will be at the end right now.
335   assert(strcmp(*(End - 1), "-c") == 0);
336   --End;
337 
338   Result = std::vector<std::string>(Args.begin() + 1, End);
339   return true;
340 }
341 
342 std::unique_ptr<FixedCompilationDatabase>
343 FixedCompilationDatabase::loadFromCommandLine(int &Argc,
344                                               const char *const *Argv,
345                                               std::string &ErrorMsg,
346                                               Twine Directory) {
347   ErrorMsg.clear();
348   if (Argc == 0)
349     return nullptr;
350   const char *const *DoubleDash = std::find(Argv, Argv + Argc, StringRef("--"));
351   if (DoubleDash == Argv + Argc)
352     return nullptr;
353   std::vector<const char *> CommandLine(DoubleDash + 1, Argv + Argc);
354   Argc = DoubleDash - Argv;
355 
356   std::vector<std::string> StrippedArgs;
357   if (!stripPositionalArgs(CommandLine, StrippedArgs, ErrorMsg))
358     return nullptr;
359   return std::make_unique<FixedCompilationDatabase>(Directory, StrippedArgs);
360 }
361 
362 std::unique_ptr<FixedCompilationDatabase>
363 FixedCompilationDatabase::loadFromFile(StringRef Path, std::string &ErrorMsg) {
364   ErrorMsg.clear();
365   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> File =
366       llvm::MemoryBuffer::getFile(Path);
367   if (std::error_code Result = File.getError()) {
368     ErrorMsg = "Error while opening fixed database: " + Result.message();
369     return nullptr;
370   }
371   std::vector<std::string> Args{llvm::line_iterator(**File),
372                                 llvm::line_iterator()};
373   return std::make_unique<FixedCompilationDatabase>(
374       llvm::sys::path::parent_path(Path), std::move(Args));
375 }
376 
377 FixedCompilationDatabase::
378 FixedCompilationDatabase(Twine Directory, ArrayRef<std::string> CommandLine) {
379   std::vector<std::string> ToolCommandLine(1, GetClangToolCommand());
380   ToolCommandLine.insert(ToolCommandLine.end(),
381                          CommandLine.begin(), CommandLine.end());
382   CompileCommands.emplace_back(Directory, StringRef(),
383                                std::move(ToolCommandLine),
384                                StringRef());
385 }
386 
387 std::vector<CompileCommand>
388 FixedCompilationDatabase::getCompileCommands(StringRef FilePath) const {
389   std::vector<CompileCommand> Result(CompileCommands);
390   Result[0].CommandLine.push_back(FilePath);
391   Result[0].Filename = FilePath;
392   return Result;
393 }
394 
395 namespace {
396 
397 class FixedCompilationDatabasePlugin : public CompilationDatabasePlugin {
398   std::unique_ptr<CompilationDatabase>
399   loadFromDirectory(StringRef Directory, std::string &ErrorMessage) override {
400     SmallString<1024> DatabasePath(Directory);
401     llvm::sys::path::append(DatabasePath, "compile_flags.txt");
402     return FixedCompilationDatabase::loadFromFile(DatabasePath, ErrorMessage);
403   }
404 };
405 
406 } // namespace
407 
408 static CompilationDatabasePluginRegistry::Add<FixedCompilationDatabasePlugin>
409 X("fixed-compilation-database", "Reads plain-text flags file");
410 
411 namespace clang {
412 namespace tooling {
413 
414 // This anchor is used to force the linker to link in the generated object file
415 // and thus register the JSONCompilationDatabasePlugin.
416 extern volatile int JSONAnchorSource;
417 static int LLVM_ATTRIBUTE_UNUSED JSONAnchorDest = JSONAnchorSource;
418 
419 } // namespace tooling
420 } // namespace clang
421