1 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This utility works much like "addr2line". It is able of transforming
10 // tuples (module name, module offset) to code locations (function name,
11 // file, line number, column number). It is targeted for compiler-rt tools
12 // (especially AddressSanitizer and ThreadSanitizer) that can use it
13 // to symbolize stack traces in their error reports.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Opts.inc"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Config/config.h"
21 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
22 #include "llvm/DebugInfo/Symbolize/Markup.h"
23 #include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
24 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
25 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
26 #include "llvm/Debuginfod/DIFetcher.h"
27 #include "llvm/Debuginfod/Debuginfod.h"
28 #include "llvm/Debuginfod/HTTPClient.h"
29 #include "llvm/Option/Arg.h"
30 #include "llvm/Option/ArgList.h"
31 #include "llvm/Option/Option.h"
32 #include "llvm/Support/COM.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/InitLLVM.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/StringSaver.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <algorithm>
41 #include <cstdio>
42 #include <cstring>
43 #include <string>
44 
45 using namespace llvm;
46 using namespace symbolize;
47 
48 namespace {
49 enum ID {
50   OPT_INVALID = 0, // This is not an option ID.
51 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
52                HELPTEXT, METAVAR, VALUES)                                      \
53   OPT_##ID,
54 #include "Opts.inc"
55 #undef OPTION
56 };
57 
58 #define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
59 #include "Opts.inc"
60 #undef PREFIX
61 
62 const opt::OptTable::Info InfoTable[] = {
63 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
64                HELPTEXT, METAVAR, VALUES)                                      \
65   {                                                                            \
66       PREFIX,      NAME,      HELPTEXT,                                        \
67       METAVAR,     OPT_##ID,  opt::Option::KIND##Class,                        \
68       PARAM,       FLAGS,     OPT_##GROUP,                                     \
69       OPT_##ALIAS, ALIASARGS, VALUES},
70 #include "Opts.inc"
71 #undef OPTION
72 };
73 
74 class SymbolizerOptTable : public opt::OptTable {
75 public:
76   SymbolizerOptTable() : OptTable(InfoTable) {
77     setGroupedShortOptions(true);
78   }
79 };
80 } // namespace
81 
82 template <typename T>
83 static void print(const Request &Request, Expected<T> &ResOrErr,
84                   DIPrinter &Printer) {
85   if (ResOrErr) {
86     // No error, print the result.
87     Printer.print(Request, *ResOrErr);
88     return;
89   }
90 
91   // Handle the error.
92   bool PrintEmpty = true;
93   handleAllErrors(std::move(ResOrErr.takeError()),
94                   [&](const ErrorInfoBase &EI) {
95                     PrintEmpty = Printer.printError(
96                         Request, EI, "LLVMSymbolizer: error reading file: ");
97                   });
98 
99   if (PrintEmpty)
100     Printer.print(Request, T());
101 }
102 
103 enum class OutputStyle { LLVM, GNU, JSON };
104 
105 enum class Command {
106   Code,
107   Data,
108   Frame,
109 };
110 
111 static void enableDebuginfod(LLVMSymbolizer &Symbolizer) {
112   static bool IsEnabled = false;
113   if (IsEnabled)
114     return;
115   IsEnabled = true;
116   // Look up symbols using the debuginfod client.
117   Symbolizer.addDIFetcher(std::make_unique<DebuginfodDIFetcher>());
118   // The HTTPClient must be initialized for use by the debuginfod client.
119   HTTPClient::initialize();
120 }
121 
122 static SmallVector<uint8_t> parseBuildID(StringRef Str) {
123   std::string Bytes;
124   if (!tryGetFromHex(Str, Bytes))
125     return {};
126   ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
127                             Bytes.size());
128   return SmallVector<uint8_t>(BuildID.begin(), BuildID.end());
129 }
130 
131 static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
132                          StringRef InputString, Command &Cmd,
133                          std::string &ModuleName,
134                          SmallVectorImpl<uint8_t> &BuildID,
135                          uint64_t &ModuleOffset) {
136   const char kDelimiters[] = " \n\r";
137   ModuleName = "";
138   if (InputString.consume_front("CODE ")) {
139     Cmd = Command::Code;
140   } else if (InputString.consume_front("DATA ")) {
141     Cmd = Command::Data;
142   } else if (InputString.consume_front("FRAME ")) {
143     Cmd = Command::Frame;
144   } else {
145     // If no cmd, assume it's CODE.
146     Cmd = Command::Code;
147   }
148 
149   const char *Pos;
150   // Skip delimiters and parse input filename (if needed).
151   if (BinaryName.empty() && BuildID.empty()) {
152     bool HasFilePrefix = false;
153     bool HasBuildIDPrefix = false;
154     while (true) {
155       if (InputString.consume_front("FILE:")) {
156         if (HasFilePrefix)
157           return false;
158         HasFilePrefix = true;
159         continue;
160       }
161       if (InputString.consume_front("BUILDID:")) {
162         if (HasBuildIDPrefix)
163           return false;
164         HasBuildIDPrefix = true;
165         continue;
166       }
167       break;
168     }
169     if (HasFilePrefix && HasBuildIDPrefix)
170       return false;
171 
172     Pos = InputString.data();
173     Pos += strspn(Pos, kDelimiters);
174     if (*Pos == '"' || *Pos == '\'') {
175       char Quote = *Pos;
176       Pos++;
177       const char *End = strchr(Pos, Quote);
178       if (!End)
179         return false;
180       ModuleName = std::string(Pos, End - Pos);
181       Pos = End + 1;
182     } else {
183       int NameLength = strcspn(Pos, kDelimiters);
184       ModuleName = std::string(Pos, NameLength);
185       Pos += NameLength;
186     }
187     if (HasBuildIDPrefix) {
188       BuildID = parseBuildID(ModuleName);
189       if (BuildID.empty())
190         return false;
191       ModuleName.clear();
192     }
193   } else {
194     Pos = InputString.data();
195     ModuleName = BinaryName.str();
196   }
197   // Skip delimiters and parse module offset.
198   Pos += strspn(Pos, kDelimiters);
199   int OffsetLength = strcspn(Pos, kDelimiters);
200   StringRef Offset(Pos, OffsetLength);
201   // GNU addr2line assumes the offset is hexadecimal and allows a redundant
202   // "0x" or "0X" prefix; do the same for compatibility.
203   if (IsAddr2Line)
204     Offset.consume_front("0x") || Offset.consume_front("0X");
205   return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
206 }
207 
208 template <typename T>
209 void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
210                     uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline,
211                     OutputStyle Style, LLVMSymbolizer &Symbolizer,
212                     DIPrinter &Printer) {
213   uint64_t AdjustedOffset = Offset - AdjustVMA;
214   object::SectionedAddress Address = {AdjustedOffset,
215                                       object::SectionedAddress::UndefSection};
216   if (Cmd == Command::Data) {
217     Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address);
218     print({ModuleName, Offset}, ResOrErr, Printer);
219   } else if (Cmd == Command::Frame) {
220     Expected<std::vector<DILocal>> ResOrErr =
221         Symbolizer.symbolizeFrame(ModuleSpec, Address);
222     print({ModuleName, Offset}, ResOrErr, Printer);
223   } else if (ShouldInline) {
224     Expected<DIInliningInfo> ResOrErr =
225         Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
226     print({ModuleName, Offset}, ResOrErr, Printer);
227   } else if (Style == OutputStyle::GNU) {
228     // With PrintFunctions == FunctionNameKind::LinkageName (default)
229     // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
230     // may override the name of an inlined function with the name of the topmost
231     // caller function in the inlining chain. This contradicts the existing
232     // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
233     // the topmost function, which suits our needs better.
234     Expected<DIInliningInfo> ResOrErr =
235         Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
236     Expected<DILineInfo> Res0OrErr =
237         !ResOrErr
238             ? Expected<DILineInfo>(ResOrErr.takeError())
239             : ((ResOrErr->getNumberOfFrames() == 0) ? DILineInfo()
240                                                     : ResOrErr->getFrame(0));
241     print({ModuleName, Offset}, Res0OrErr, Printer);
242   } else {
243     Expected<DILineInfo> ResOrErr =
244         Symbolizer.symbolizeCode(ModuleSpec, Address);
245     print({ModuleName, Offset}, ResOrErr, Printer);
246   }
247   Symbolizer.pruneCache();
248 }
249 
250 static void symbolizeInput(const opt::InputArgList &Args,
251                            ArrayRef<uint8_t> IncomingBuildID,
252                            uint64_t AdjustVMA, bool IsAddr2Line,
253                            OutputStyle Style, StringRef InputString,
254                            LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
255   Command Cmd;
256   std::string ModuleName;
257   SmallVector<uint8_t> BuildID(IncomingBuildID.begin(), IncomingBuildID.end());
258   uint64_t Offset = 0;
259   if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
260                     StringRef(InputString), Cmd, ModuleName, BuildID, Offset)) {
261     Printer.printInvalidCommand({ModuleName, None}, InputString);
262     return;
263   }
264   bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
265   if (!BuildID.empty()) {
266     assert(ModuleName.empty());
267     if (!Args.hasArg(OPT_no_debuginfod))
268       enableDebuginfod(Symbolizer);
269     std::string BuildIDStr = toHex(BuildID);
270     executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline,
271                    Style, Symbolizer, Printer);
272   } else {
273     executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline,
274                    Style, Symbolizer, Printer);
275   }
276 }
277 
278 static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,
279                       raw_ostream &OS) {
280   const char HelpText[] = " [options] addresses...";
281   Tbl.printHelp(OS, (ToolName + HelpText).str().c_str(),
282                 ToolName.str().c_str());
283   // TODO Replace this with OptTable API once it adds extrahelp support.
284   OS << "\nPass @FILE as argument to read options from FILE.\n";
285 }
286 
287 static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line,
288                                       StringSaver &Saver,
289                                       SymbolizerOptTable &Tbl) {
290   StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer";
291   // The environment variable specifies initial options which can be overridden
292   // by commnad line options.
293   Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS"
294                                                    : "LLVM_SYMBOLIZER_OPTS");
295   bool HasError = false;
296   opt::InputArgList Args =
297       Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
298         errs() << ("error: " + Msg + "\n");
299         HasError = true;
300       });
301   if (HasError)
302     exit(1);
303   if (Args.hasArg(OPT_help)) {
304     printHelp(ToolName, Tbl, outs());
305     exit(0);
306   }
307   if (Args.hasArg(OPT_version)) {
308     outs() << ToolName << '\n';
309     cl::PrintVersionMessage();
310     exit(0);
311   }
312 
313   return Args;
314 }
315 
316 template <typename T>
317 static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) {
318   if (const opt::Arg *A = Args.getLastArg(ID)) {
319     StringRef V(A->getValue());
320     if (!llvm::to_integer(V, Value, 0)) {
321       errs() << A->getSpelling() +
322                     ": expected a non-negative integer, but got '" + V + "'";
323       exit(1);
324     }
325   } else {
326     Value = 0;
327   }
328 }
329 
330 static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
331                                                   bool IsAddr2Line) {
332   if (Args.hasArg(OPT_functions))
333     return FunctionNameKind::LinkageName;
334   if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ))
335     return StringSwitch<FunctionNameKind>(A->getValue())
336         .Case("none", FunctionNameKind::None)
337         .Case("short", FunctionNameKind::ShortName)
338         .Default(FunctionNameKind::LinkageName);
339   return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
340 }
341 
342 static Optional<bool> parseColorArg(const opt::InputArgList &Args) {
343   if (Args.hasArg(OPT_color))
344     return true;
345   if (const opt::Arg *A = Args.getLastArg(OPT_color_EQ))
346     return StringSwitch<Optional<bool>>(A->getValue())
347         .Case("always", true)
348         .Case("never", false)
349         .Case("auto", None);
350   return None;
351 }
352 
353 static SmallVector<uint8_t> parseBuildIDArg(const opt::InputArgList &Args,
354                                             int ID) {
355   const opt::Arg *A = Args.getLastArg(ID);
356   if (!A)
357     return {};
358 
359   StringRef V(A->getValue());
360   SmallVector<uint8_t> BuildID = parseBuildID(V);
361   if (BuildID.empty()) {
362     errs() << A->getSpelling() + ": expected a build ID, but got '" + V + "'\n";
363     exit(1);
364   }
365   return BuildID;
366 }
367 
368 // Symbolize markup from stdin and write the result to stdout.
369 static void filterMarkup(const opt::InputArgList &Args, LLVMSymbolizer &Symbolizer) {
370   MarkupFilter Filter(outs(), Symbolizer, parseColorArg(Args));
371   std::string InputString;
372   while (std::getline(std::cin, InputString)) {
373     InputString += '\n';
374     Filter.filter(InputString);
375   }
376   Filter.finish();
377 }
378 
379 ExitOnError ExitOnErr;
380 
381 int main(int argc, char **argv) {
382   InitLLVM X(argc, argv);
383   sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
384 
385   bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
386   BumpPtrAllocator A;
387   StringSaver Saver(A);
388   SymbolizerOptTable Tbl;
389   opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl);
390 
391   LLVMSymbolizer::Options Opts;
392   uint64_t AdjustVMA;
393   PrinterConfig Config;
394   parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA);
395   if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) {
396     Opts.PathStyle =
397         A->getOption().matches(OPT_basenames)
398             ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
399             : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath;
400   } else {
401     Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath;
402   }
403   Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ);
404   Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str();
405   Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line);
406   Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str();
407   Opts.FallbackDebugPath =
408       Args.getLastArgValue(OPT_fallback_debug_path_EQ).str();
409   Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line);
410   parseIntArg(Args, OPT_print_source_context_lines_EQ,
411               Config.SourceContextLines);
412   Opts.RelativeAddresses = Args.hasArg(OPT_relative_address);
413   Opts.UntagAddresses =
414       Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line);
415   Opts.UseDIA = Args.hasArg(OPT_use_dia);
416 #if !defined(LLVM_ENABLE_DIA_SDK)
417   if (Opts.UseDIA) {
418     WithColor::warning() << "DIA not available; using native PDB reader\n";
419     Opts.UseDIA = false;
420   }
421 #endif
422   Opts.UseSymbolTable = true;
423   if (Args.hasArg(OPT_cache_size_EQ))
424     parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize);
425   Config.PrintAddress = Args.hasArg(OPT_addresses);
426   Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None;
427   Config.Pretty = Args.hasArg(OPT_pretty_print);
428   Config.Verbose = Args.hasArg(OPT_verbose);
429 
430   for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) {
431     StringRef Hint(A->getValue());
432     if (sys::path::extension(Hint) == ".dSYM") {
433       Opts.DsymHints.emplace_back(Hint);
434     } else {
435       errs() << "Warning: invalid dSYM hint: \"" << Hint
436              << "\" (must have the '.dSYM' extension).\n";
437     }
438   }
439 
440   LLVMSymbolizer Symbolizer(Opts);
441 
442   // A debuginfod lookup could succeed if a HTTP client is available and at
443   // least one backing URL is configured.
444   bool ShouldUseDebuginfodByDefault =
445       HTTPClient::isAvailable() &&
446       !ExitOnErr(getDefaultDebuginfodUrls()).empty();
447   if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod,
448                    ShouldUseDebuginfodByDefault))
449     enableDebuginfod(Symbolizer);
450 
451   if (Args.hasArg(OPT_filter_markup)) {
452     filterMarkup(Args, Symbolizer);
453     return 0;
454   }
455 
456   auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM;
457   if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) {
458     if (strcmp(A->getValue(), "GNU") == 0)
459       Style = OutputStyle::GNU;
460     else if (strcmp(A->getValue(), "JSON") == 0)
461       Style = OutputStyle::JSON;
462     else
463       Style = OutputStyle::LLVM;
464   }
465 
466   if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) {
467     errs() << "error: cannot specify both --build-id and --obj\n";
468     return EXIT_FAILURE;
469   }
470   SmallVector<uint8_t> BuildID = parseBuildIDArg(Args, OPT_build_id_EQ);
471 
472   std::unique_ptr<DIPrinter> Printer;
473   if (Style == OutputStyle::GNU)
474     Printer = std::make_unique<GNUPrinter>(outs(), errs(), Config);
475   else if (Style == OutputStyle::JSON)
476     Printer = std::make_unique<JSONPrinter>(outs(), Config);
477   else
478     Printer = std::make_unique<LLVMPrinter>(outs(), errs(), Config);
479 
480   std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);
481   if (InputAddresses.empty()) {
482     const int kMaxInputStringLength = 1024;
483     char InputString[kMaxInputStringLength];
484 
485     while (fgets(InputString, sizeof(InputString), stdin)) {
486       // Strip newline characters.
487       std::string StrippedInputString(InputString);
488       llvm::erase_if(StrippedInputString,
489                      [](char c) { return c == '\r' || c == '\n'; });
490       symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style,
491                      StrippedInputString, Symbolizer, *Printer);
492       outs().flush();
493     }
494   } else {
495     Printer->listBegin();
496     for (StringRef Address : InputAddresses)
497       symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address,
498                      Symbolizer, *Printer);
499     Printer->listEnd();
500   }
501 
502   return 0;
503 }
504