1 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This utility works much like "addr2line". It is able of transforming
10 // tuples (module name, module offset) to code locations (function name,
11 // file, line number, column number). It is targeted for compiler-rt tools
12 // (especially AddressSanitizer and ThreadSanitizer) that can use it
13 // to symbolize stack traces in their error reports.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Opts.inc"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Config/config.h"
21 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
22 #include "llvm/DebugInfo/Symbolize/Markup.h"
23 #include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
24 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
25 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
26 #include "llvm/Debuginfod/BuildIDFetcher.h"
27 #include "llvm/Debuginfod/Debuginfod.h"
28 #include "llvm/Debuginfod/HTTPClient.h"
29 #include "llvm/Option/Arg.h"
30 #include "llvm/Option/ArgList.h"
31 #include "llvm/Option/Option.h"
32 #include "llvm/Support/COM.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/InitLLVM.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/StringSaver.h"
39 #include "llvm/Support/WithColor.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <algorithm>
42 #include <cstdio>
43 #include <cstring>
44 #include <iostream>
45 #include <string>
46 
47 using namespace llvm;
48 using namespace symbolize;
49 
50 namespace {
51 enum ID {
52   OPT_INVALID = 0, // This is not an option ID.
53 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
54                HELPTEXT, METAVAR, VALUES)                                      \
55   OPT_##ID,
56 #include "Opts.inc"
57 #undef OPTION
58 };
59 
60 #define PREFIX(NAME, VALUE)                                                    \
61   static constexpr StringLiteral NAME##_init[] = VALUE;                        \
62   static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
63                                                 std::size(NAME##_init) - 1);
64 #include "Opts.inc"
65 #undef PREFIX
66 
67 static constexpr opt::OptTable::Info InfoTable[] = {
68 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
69                HELPTEXT, METAVAR, VALUES)                                      \
70   {                                                                            \
71       PREFIX,      NAME,      HELPTEXT,                                        \
72       METAVAR,     OPT_##ID,  opt::Option::KIND##Class,                        \
73       PARAM,       FLAGS,     OPT_##GROUP,                                     \
74       OPT_##ALIAS, ALIASARGS, VALUES},
75 #include "Opts.inc"
76 #undef OPTION
77 };
78 
79 class SymbolizerOptTable : public opt::GenericOptTable {
80 public:
81   SymbolizerOptTable() : GenericOptTable(InfoTable) {
82     setGroupedShortOptions(true);
83   }
84 };
85 } // namespace
86 
87 static std::string ToolName;
88 
89 static void printError(const ErrorInfoBase &EI, StringRef Path) {
90   WithColor::error(errs(), ToolName);
91   if (!EI.isA<FileError>())
92     errs() << "'" << Path << "': ";
93   EI.log(errs());
94   errs() << '\n';
95 }
96 
97 template <typename T>
98 static void print(const Request &Request, Expected<T> &ResOrErr,
99                   DIPrinter &Printer) {
100   if (ResOrErr) {
101     // No error, print the result.
102     Printer.print(Request, *ResOrErr);
103     return;
104   }
105 
106   // Handle the error.
107   bool PrintEmpty = true;
108   handleAllErrors(std::move(ResOrErr.takeError()),
109                   [&](const ErrorInfoBase &EI) {
110                     PrintEmpty = Printer.printError(Request, EI);
111                   });
112 
113   if (PrintEmpty)
114     Printer.print(Request, T());
115 }
116 
117 enum class OutputStyle { LLVM, GNU, JSON };
118 
119 enum class Command {
120   Code,
121   Data,
122   Frame,
123 };
124 
125 static void enableDebuginfod(LLVMSymbolizer &Symbolizer,
126                              const opt::ArgList &Args) {
127   static bool IsEnabled = false;
128   if (IsEnabled)
129     return;
130   IsEnabled = true;
131   // Look up symbols using the debuginfod client.
132   Symbolizer.setBuildIDFetcher(std::make_unique<DebuginfodFetcher>(
133       Args.getAllArgValues(OPT_debug_file_directory_EQ)));
134   // The HTTPClient must be initialized for use by the debuginfod client.
135   HTTPClient::initialize();
136 }
137 
138 static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
139                          StringRef InputString, Command &Cmd,
140                          std::string &ModuleName, object::BuildID &BuildID,
141                          uint64_t &ModuleOffset) {
142   const char kDelimiters[] = " \n\r";
143   ModuleName = "";
144   if (InputString.consume_front("CODE ")) {
145     Cmd = Command::Code;
146   } else if (InputString.consume_front("DATA ")) {
147     Cmd = Command::Data;
148   } else if (InputString.consume_front("FRAME ")) {
149     Cmd = Command::Frame;
150   } else {
151     // If no cmd, assume it's CODE.
152     Cmd = Command::Code;
153   }
154 
155   const char *Pos;
156   // Skip delimiters and parse input filename (if needed).
157   if (BinaryName.empty() && BuildID.empty()) {
158     bool HasFilePrefix = false;
159     bool HasBuildIDPrefix = false;
160     while (true) {
161       if (InputString.consume_front("FILE:")) {
162         if (HasFilePrefix)
163           return false;
164         HasFilePrefix = true;
165         continue;
166       }
167       if (InputString.consume_front("BUILDID:")) {
168         if (HasBuildIDPrefix)
169           return false;
170         HasBuildIDPrefix = true;
171         continue;
172       }
173       break;
174     }
175     if (HasFilePrefix && HasBuildIDPrefix)
176       return false;
177 
178     Pos = InputString.data();
179     Pos += strspn(Pos, kDelimiters);
180     if (*Pos == '"' || *Pos == '\'') {
181       char Quote = *Pos;
182       Pos++;
183       const char *End = strchr(Pos, Quote);
184       if (!End)
185         return false;
186       ModuleName = std::string(Pos, End - Pos);
187       Pos = End + 1;
188     } else {
189       int NameLength = strcspn(Pos, kDelimiters);
190       ModuleName = std::string(Pos, NameLength);
191       Pos += NameLength;
192     }
193     if (HasBuildIDPrefix) {
194       BuildID = parseBuildID(ModuleName);
195       if (BuildID.empty())
196         return false;
197       ModuleName.clear();
198     }
199   } else {
200     Pos = InputString.data();
201     ModuleName = BinaryName.str();
202   }
203   // Skip delimiters and parse module offset.
204   Pos += strspn(Pos, kDelimiters);
205   int OffsetLength = strcspn(Pos, kDelimiters);
206   StringRef Offset(Pos, OffsetLength);
207   // GNU addr2line assumes the offset is hexadecimal and allows a redundant
208   // "0x" or "0X" prefix; do the same for compatibility.
209   if (IsAddr2Line)
210     Offset.consume_front("0x") || Offset.consume_front("0X");
211   return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
212 }
213 
214 template <typename T>
215 void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
216                     uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline,
217                     OutputStyle Style, LLVMSymbolizer &Symbolizer,
218                     DIPrinter &Printer) {
219   uint64_t AdjustedOffset = Offset - AdjustVMA;
220   object::SectionedAddress Address = {AdjustedOffset,
221                                       object::SectionedAddress::UndefSection};
222   Request SymRequest = {ModuleName, Offset};
223   if (Cmd == Command::Data) {
224     Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address);
225     print(SymRequest, ResOrErr, Printer);
226   } else if (Cmd == Command::Frame) {
227     Expected<std::vector<DILocal>> ResOrErr =
228         Symbolizer.symbolizeFrame(ModuleSpec, Address);
229     print(SymRequest, ResOrErr, Printer);
230   } else if (ShouldInline) {
231     Expected<DIInliningInfo> ResOrErr =
232         Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
233     print(SymRequest, ResOrErr, Printer);
234   } else if (Style == OutputStyle::GNU) {
235     // With PrintFunctions == FunctionNameKind::LinkageName (default)
236     // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
237     // may override the name of an inlined function with the name of the topmost
238     // caller function in the inlining chain. This contradicts the existing
239     // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
240     // the topmost function, which suits our needs better.
241     Expected<DIInliningInfo> ResOrErr =
242         Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
243     Expected<DILineInfo> Res0OrErr =
244         !ResOrErr
245             ? Expected<DILineInfo>(ResOrErr.takeError())
246             : ((ResOrErr->getNumberOfFrames() == 0) ? DILineInfo()
247                                                     : ResOrErr->getFrame(0));
248     print(SymRequest, Res0OrErr, Printer);
249   } else {
250     Expected<DILineInfo> ResOrErr =
251         Symbolizer.symbolizeCode(ModuleSpec, Address);
252     print(SymRequest, ResOrErr, Printer);
253   }
254   Symbolizer.pruneCache();
255 }
256 
257 static void symbolizeInput(const opt::InputArgList &Args,
258                            object::BuildIDRef IncomingBuildID,
259                            uint64_t AdjustVMA, bool IsAddr2Line,
260                            OutputStyle Style, StringRef InputString,
261                            LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
262   Command Cmd;
263   std::string ModuleName;
264   object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end());
265   uint64_t Offset = 0;
266   if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
267                     StringRef(InputString), Cmd, ModuleName, BuildID, Offset)) {
268     Printer.printInvalidCommand({ModuleName, std::nullopt}, InputString);
269     return;
270   }
271   bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
272   if (!BuildID.empty()) {
273     assert(ModuleName.empty());
274     if (!Args.hasArg(OPT_no_debuginfod))
275       enableDebuginfod(Symbolizer, Args);
276     std::string BuildIDStr = toHex(BuildID);
277     executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline,
278                    Style, Symbolizer, Printer);
279   } else {
280     executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline,
281                    Style, Symbolizer, Printer);
282   }
283 }
284 
285 static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,
286                       raw_ostream &OS) {
287   const char HelpText[] = " [options] addresses...";
288   Tbl.printHelp(OS, (ToolName + HelpText).str().c_str(),
289                 ToolName.str().c_str());
290   // TODO Replace this with OptTable API once it adds extrahelp support.
291   OS << "\nPass @FILE as argument to read options from FILE.\n";
292 }
293 
294 static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line,
295                                       StringSaver &Saver,
296                                       SymbolizerOptTable &Tbl) {
297   StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer";
298   // The environment variable specifies initial options which can be overridden
299   // by commnad line options.
300   Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS"
301                                                    : "LLVM_SYMBOLIZER_OPTS");
302   bool HasError = false;
303   opt::InputArgList Args =
304       Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
305         errs() << ("error: " + Msg + "\n");
306         HasError = true;
307       });
308   if (HasError)
309     exit(1);
310   if (Args.hasArg(OPT_help)) {
311     printHelp(ToolName, Tbl, outs());
312     exit(0);
313   }
314   if (Args.hasArg(OPT_version)) {
315     outs() << ToolName << '\n';
316     cl::PrintVersionMessage();
317     exit(0);
318   }
319 
320   return Args;
321 }
322 
323 template <typename T>
324 static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) {
325   if (const opt::Arg *A = Args.getLastArg(ID)) {
326     StringRef V(A->getValue());
327     if (!llvm::to_integer(V, Value, 0)) {
328       errs() << A->getSpelling() +
329                     ": expected a non-negative integer, but got '" + V + "'";
330       exit(1);
331     }
332   } else {
333     Value = 0;
334   }
335 }
336 
337 static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
338                                                   bool IsAddr2Line) {
339   if (Args.hasArg(OPT_functions))
340     return FunctionNameKind::LinkageName;
341   if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ))
342     return StringSwitch<FunctionNameKind>(A->getValue())
343         .Case("none", FunctionNameKind::None)
344         .Case("short", FunctionNameKind::ShortName)
345         .Default(FunctionNameKind::LinkageName);
346   return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
347 }
348 
349 static std::optional<bool> parseColorArg(const opt::InputArgList &Args) {
350   if (Args.hasArg(OPT_color))
351     return true;
352   if (const opt::Arg *A = Args.getLastArg(OPT_color_EQ))
353     return StringSwitch<std::optional<bool>>(A->getValue())
354         .Case("always", true)
355         .Case("never", false)
356         .Case("auto", std::nullopt);
357   return std::nullopt;
358 }
359 
360 static object::BuildID parseBuildIDArg(const opt::InputArgList &Args, int ID) {
361   const opt::Arg *A = Args.getLastArg(ID);
362   if (!A)
363     return {};
364 
365   StringRef V(A->getValue());
366   object::BuildID BuildID = parseBuildID(V);
367   if (BuildID.empty()) {
368     errs() << A->getSpelling() + ": expected a build ID, but got '" + V + "'\n";
369     exit(1);
370   }
371   return BuildID;
372 }
373 
374 // Symbolize markup from stdin and write the result to stdout.
375 static void filterMarkup(const opt::InputArgList &Args, LLVMSymbolizer &Symbolizer) {
376   MarkupFilter Filter(outs(), Symbolizer, parseColorArg(Args));
377   std::string InputString;
378   while (std::getline(std::cin, InputString)) {
379     InputString += '\n';
380     Filter.filter(InputString);
381   }
382   Filter.finish();
383 }
384 
385 ExitOnError ExitOnErr;
386 
387 int main(int argc, char **argv) {
388   InitLLVM X(argc, argv);
389   sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
390 
391   ToolName = argv[0];
392   bool IsAddr2Line = sys::path::stem(ToolName).contains("addr2line");
393   BumpPtrAllocator A;
394   StringSaver Saver(A);
395   SymbolizerOptTable Tbl;
396   opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl);
397 
398   LLVMSymbolizer::Options Opts;
399   uint64_t AdjustVMA;
400   PrinterConfig Config;
401   parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA);
402   if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) {
403     Opts.PathStyle =
404         A->getOption().matches(OPT_basenames)
405             ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
406             : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath;
407   } else {
408     Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath;
409   }
410   Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ);
411   Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str();
412   Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line);
413   Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str();
414   Opts.FallbackDebugPath =
415       Args.getLastArgValue(OPT_fallback_debug_path_EQ).str();
416   Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line);
417   parseIntArg(Args, OPT_print_source_context_lines_EQ,
418               Config.SourceContextLines);
419   Opts.RelativeAddresses = Args.hasArg(OPT_relative_address);
420   Opts.UntagAddresses =
421       Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line);
422   Opts.UseDIA = Args.hasArg(OPT_use_dia);
423 #if !defined(LLVM_ENABLE_DIA_SDK)
424   if (Opts.UseDIA) {
425     WithColor::warning() << "DIA not available; using native PDB reader\n";
426     Opts.UseDIA = false;
427   }
428 #endif
429   Opts.UseSymbolTable = true;
430   if (Args.hasArg(OPT_cache_size_EQ))
431     parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize);
432   Config.PrintAddress = Args.hasArg(OPT_addresses);
433   Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None;
434   Config.Pretty = Args.hasArg(OPT_pretty_print);
435   Config.Verbose = Args.hasArg(OPT_verbose);
436 
437   for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) {
438     StringRef Hint(A->getValue());
439     if (sys::path::extension(Hint) == ".dSYM") {
440       Opts.DsymHints.emplace_back(Hint);
441     } else {
442       errs() << "Warning: invalid dSYM hint: \"" << Hint
443              << "\" (must have the '.dSYM' extension).\n";
444     }
445   }
446 
447   LLVMSymbolizer Symbolizer(Opts);
448 
449   if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod, canUseDebuginfod()))
450     enableDebuginfod(Symbolizer, Args);
451 
452   if (Args.hasArg(OPT_filter_markup)) {
453     filterMarkup(Args, Symbolizer);
454     return 0;
455   }
456 
457   auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM;
458   if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) {
459     if (strcmp(A->getValue(), "GNU") == 0)
460       Style = OutputStyle::GNU;
461     else if (strcmp(A->getValue(), "JSON") == 0)
462       Style = OutputStyle::JSON;
463     else
464       Style = OutputStyle::LLVM;
465   }
466 
467   if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) {
468     errs() << "error: cannot specify both --build-id and --obj\n";
469     return EXIT_FAILURE;
470   }
471   object::BuildID BuildID = parseBuildIDArg(Args, OPT_build_id_EQ);
472 
473   std::unique_ptr<DIPrinter> Printer;
474   if (Style == OutputStyle::GNU)
475     Printer = std::make_unique<GNUPrinter>(outs(), printError, Config);
476   else if (Style == OutputStyle::JSON)
477     Printer = std::make_unique<JSONPrinter>(outs(), Config);
478   else
479     Printer = std::make_unique<LLVMPrinter>(outs(), printError, Config);
480 
481   // When an input file is specified, exit immediately if the file cannot be
482   // read. If getOrCreateModuleInfo succeeds, symbolizeInput will reuse the
483   // cached file handle.
484   if (auto *Arg = Args.getLastArg(OPT_obj_EQ); Arg) {
485     auto Status = Symbolizer.getOrCreateModuleInfo(Arg->getValue());
486     if (!Status) {
487       Request SymRequest = {Arg->getValue(), 0};
488       handleAllErrors(Status.takeError(), [&](const ErrorInfoBase &EI) {
489         Printer->printError(SymRequest, EI);
490       });
491       return EXIT_FAILURE;
492     }
493   }
494 
495   std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);
496   if (InputAddresses.empty()) {
497     const int kMaxInputStringLength = 1024;
498     char InputString[kMaxInputStringLength];
499 
500     while (fgets(InputString, sizeof(InputString), stdin)) {
501       // Strip newline characters.
502       std::string StrippedInputString(InputString);
503       llvm::erase_if(StrippedInputString,
504                      [](char c) { return c == '\r' || c == '\n'; });
505       symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style,
506                      StrippedInputString, Symbolizer, *Printer);
507       outs().flush();
508     }
509   } else {
510     Printer->listBegin();
511     for (StringRef Address : InputAddresses)
512       symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address,
513                      Symbolizer, *Printer);
514     Printer->listEnd();
515   }
516 
517   return 0;
518 }
519