1 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This utility works much like "addr2line". It is able of transforming
10 // tuples (module name, module offset) to code locations (function name,
11 // file, line number, column number). It is targeted for compiler-rt tools
12 // (especially AddressSanitizer and ThreadSanitizer) that can use it
13 // to symbolize stack traces in their error reports.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Opts.inc"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Config/config.h"
21 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
22 #include "llvm/DebugInfo/Symbolize/Markup.h"
23 #include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
24 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
25 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
26 #include "llvm/Debuginfod/BuildIDFetcher.h"
27 #include "llvm/Debuginfod/Debuginfod.h"
28 #include "llvm/Debuginfod/HTTPClient.h"
29 #include "llvm/Option/Arg.h"
30 #include "llvm/Option/ArgList.h"
31 #include "llvm/Option/Option.h"
32 #include "llvm/Support/COM.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/InitLLVM.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/StringSaver.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <algorithm>
41 #include <cstdio>
42 #include <cstring>
43 #include <iostream>
44 #include <string>
45 
46 using namespace llvm;
47 using namespace symbolize;
48 
49 namespace {
50 enum ID {
51   OPT_INVALID = 0, // This is not an option ID.
52 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
53                HELPTEXT, METAVAR, VALUES)                                      \
54   OPT_##ID,
55 #include "Opts.inc"
56 #undef OPTION
57 };
58 
59 #define PREFIX(NAME, VALUE)                                                    \
60   static constexpr StringLiteral NAME##_init[] = VALUE;                        \
61   static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
62                                                 std::size(NAME##_init) - 1);
63 #include "Opts.inc"
64 #undef PREFIX
65 
66 static constexpr opt::OptTable::Info InfoTable[] = {
67 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
68                HELPTEXT, METAVAR, VALUES)                                      \
69   {                                                                            \
70       PREFIX,      NAME,      HELPTEXT,                                        \
71       METAVAR,     OPT_##ID,  opt::Option::KIND##Class,                        \
72       PARAM,       FLAGS,     OPT_##GROUP,                                     \
73       OPT_##ALIAS, ALIASARGS, VALUES},
74 #include "Opts.inc"
75 #undef OPTION
76 };
77 
78 class SymbolizerOptTable : public opt::GenericOptTable {
79 public:
80   SymbolizerOptTable() : GenericOptTable(InfoTable) {
81     setGroupedShortOptions(true);
82   }
83 };
84 } // namespace
85 
86 template <typename T>
87 static void print(const Request &Request, Expected<T> &ResOrErr,
88                   DIPrinter &Printer) {
89   if (ResOrErr) {
90     // No error, print the result.
91     Printer.print(Request, *ResOrErr);
92     return;
93   }
94 
95   // Handle the error.
96   bool PrintEmpty = true;
97   handleAllErrors(std::move(ResOrErr.takeError()),
98                   [&](const ErrorInfoBase &EI) {
99                     PrintEmpty = Printer.printError(
100                         Request, EI, "LLVMSymbolizer: error reading file: ");
101                   });
102 
103   if (PrintEmpty)
104     Printer.print(Request, T());
105 }
106 
107 enum class OutputStyle { LLVM, GNU, JSON };
108 
109 enum class Command {
110   Code,
111   Data,
112   Frame,
113 };
114 
115 static void enableDebuginfod(LLVMSymbolizer &Symbolizer,
116                              const opt::ArgList &Args) {
117   static bool IsEnabled = false;
118   if (IsEnabled)
119     return;
120   IsEnabled = true;
121   // Look up symbols using the debuginfod client.
122   Symbolizer.setBuildIDFetcher(std::make_unique<DebuginfodFetcher>(
123       Args.getAllArgValues(OPT_debug_file_directory_EQ)));
124   // The HTTPClient must be initialized for use by the debuginfod client.
125   HTTPClient::initialize();
126 }
127 
128 static object::BuildID parseBuildID(StringRef Str) {
129   std::string Bytes;
130   if (!tryGetFromHex(Str, Bytes))
131     return {};
132   ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
133                             Bytes.size());
134   return object::BuildID(BuildID.begin(), BuildID.end());
135 }
136 
137 static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
138                          StringRef InputString, Command &Cmd,
139                          std::string &ModuleName, object::BuildID &BuildID,
140                          uint64_t &ModuleOffset) {
141   const char kDelimiters[] = " \n\r";
142   ModuleName = "";
143   if (InputString.consume_front("CODE ")) {
144     Cmd = Command::Code;
145   } else if (InputString.consume_front("DATA ")) {
146     Cmd = Command::Data;
147   } else if (InputString.consume_front("FRAME ")) {
148     Cmd = Command::Frame;
149   } else {
150     // If no cmd, assume it's CODE.
151     Cmd = Command::Code;
152   }
153 
154   const char *Pos;
155   // Skip delimiters and parse input filename (if needed).
156   if (BinaryName.empty() && BuildID.empty()) {
157     bool HasFilePrefix = false;
158     bool HasBuildIDPrefix = false;
159     while (true) {
160       if (InputString.consume_front("FILE:")) {
161         if (HasFilePrefix)
162           return false;
163         HasFilePrefix = true;
164         continue;
165       }
166       if (InputString.consume_front("BUILDID:")) {
167         if (HasBuildIDPrefix)
168           return false;
169         HasBuildIDPrefix = true;
170         continue;
171       }
172       break;
173     }
174     if (HasFilePrefix && HasBuildIDPrefix)
175       return false;
176 
177     Pos = InputString.data();
178     Pos += strspn(Pos, kDelimiters);
179     if (*Pos == '"' || *Pos == '\'') {
180       char Quote = *Pos;
181       Pos++;
182       const char *End = strchr(Pos, Quote);
183       if (!End)
184         return false;
185       ModuleName = std::string(Pos, End - Pos);
186       Pos = End + 1;
187     } else {
188       int NameLength = strcspn(Pos, kDelimiters);
189       ModuleName = std::string(Pos, NameLength);
190       Pos += NameLength;
191     }
192     if (HasBuildIDPrefix) {
193       BuildID = parseBuildID(ModuleName);
194       if (BuildID.empty())
195         return false;
196       ModuleName.clear();
197     }
198   } else {
199     Pos = InputString.data();
200     ModuleName = BinaryName.str();
201   }
202   // Skip delimiters and parse module offset.
203   Pos += strspn(Pos, kDelimiters);
204   int OffsetLength = strcspn(Pos, kDelimiters);
205   StringRef Offset(Pos, OffsetLength);
206   // GNU addr2line assumes the offset is hexadecimal and allows a redundant
207   // "0x" or "0X" prefix; do the same for compatibility.
208   if (IsAddr2Line)
209     Offset.consume_front("0x") || Offset.consume_front("0X");
210   return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
211 }
212 
213 template <typename T>
214 void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
215                     uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline,
216                     OutputStyle Style, LLVMSymbolizer &Symbolizer,
217                     DIPrinter &Printer) {
218   uint64_t AdjustedOffset = Offset - AdjustVMA;
219   object::SectionedAddress Address = {AdjustedOffset,
220                                       object::SectionedAddress::UndefSection};
221   if (Cmd == Command::Data) {
222     Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address);
223     print({ModuleName, Offset}, ResOrErr, Printer);
224   } else if (Cmd == Command::Frame) {
225     Expected<std::vector<DILocal>> ResOrErr =
226         Symbolizer.symbolizeFrame(ModuleSpec, Address);
227     print({ModuleName, Offset}, ResOrErr, Printer);
228   } else if (ShouldInline) {
229     Expected<DIInliningInfo> ResOrErr =
230         Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
231     print({ModuleName, Offset}, ResOrErr, Printer);
232   } else if (Style == OutputStyle::GNU) {
233     // With PrintFunctions == FunctionNameKind::LinkageName (default)
234     // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
235     // may override the name of an inlined function with the name of the topmost
236     // caller function in the inlining chain. This contradicts the existing
237     // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
238     // the topmost function, which suits our needs better.
239     Expected<DIInliningInfo> ResOrErr =
240         Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
241     Expected<DILineInfo> Res0OrErr =
242         !ResOrErr
243             ? Expected<DILineInfo>(ResOrErr.takeError())
244             : ((ResOrErr->getNumberOfFrames() == 0) ? DILineInfo()
245                                                     : ResOrErr->getFrame(0));
246     print({ModuleName, Offset}, Res0OrErr, Printer);
247   } else {
248     Expected<DILineInfo> ResOrErr =
249         Symbolizer.symbolizeCode(ModuleSpec, Address);
250     print({ModuleName, Offset}, ResOrErr, Printer);
251   }
252   Symbolizer.pruneCache();
253 }
254 
255 static void symbolizeInput(const opt::InputArgList &Args,
256                            object::BuildIDRef IncomingBuildID,
257                            uint64_t AdjustVMA, bool IsAddr2Line,
258                            OutputStyle Style, StringRef InputString,
259                            LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
260   Command Cmd;
261   std::string ModuleName;
262   object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end());
263   uint64_t Offset = 0;
264   if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
265                     StringRef(InputString), Cmd, ModuleName, BuildID, Offset)) {
266     Printer.printInvalidCommand({ModuleName, std::nullopt}, InputString);
267     return;
268   }
269   bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
270   if (!BuildID.empty()) {
271     assert(ModuleName.empty());
272     if (!Args.hasArg(OPT_no_debuginfod))
273       enableDebuginfod(Symbolizer, Args);
274     std::string BuildIDStr = toHex(BuildID);
275     executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline,
276                    Style, Symbolizer, Printer);
277   } else {
278     executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline,
279                    Style, Symbolizer, Printer);
280   }
281 }
282 
283 static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,
284                       raw_ostream &OS) {
285   const char HelpText[] = " [options] addresses...";
286   Tbl.printHelp(OS, (ToolName + HelpText).str().c_str(),
287                 ToolName.str().c_str());
288   // TODO Replace this with OptTable API once it adds extrahelp support.
289   OS << "\nPass @FILE as argument to read options from FILE.\n";
290 }
291 
292 static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line,
293                                       StringSaver &Saver,
294                                       SymbolizerOptTable &Tbl) {
295   StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer";
296   // The environment variable specifies initial options which can be overridden
297   // by commnad line options.
298   Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS"
299                                                    : "LLVM_SYMBOLIZER_OPTS");
300   bool HasError = false;
301   opt::InputArgList Args =
302       Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
303         errs() << ("error: " + Msg + "\n");
304         HasError = true;
305       });
306   if (HasError)
307     exit(1);
308   if (Args.hasArg(OPT_help)) {
309     printHelp(ToolName, Tbl, outs());
310     exit(0);
311   }
312   if (Args.hasArg(OPT_version)) {
313     outs() << ToolName << '\n';
314     cl::PrintVersionMessage();
315     exit(0);
316   }
317 
318   return Args;
319 }
320 
321 template <typename T>
322 static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) {
323   if (const opt::Arg *A = Args.getLastArg(ID)) {
324     StringRef V(A->getValue());
325     if (!llvm::to_integer(V, Value, 0)) {
326       errs() << A->getSpelling() +
327                     ": expected a non-negative integer, but got '" + V + "'";
328       exit(1);
329     }
330   } else {
331     Value = 0;
332   }
333 }
334 
335 static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
336                                                   bool IsAddr2Line) {
337   if (Args.hasArg(OPT_functions))
338     return FunctionNameKind::LinkageName;
339   if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ))
340     return StringSwitch<FunctionNameKind>(A->getValue())
341         .Case("none", FunctionNameKind::None)
342         .Case("short", FunctionNameKind::ShortName)
343         .Default(FunctionNameKind::LinkageName);
344   return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
345 }
346 
347 static std::optional<bool> parseColorArg(const opt::InputArgList &Args) {
348   if (Args.hasArg(OPT_color))
349     return true;
350   if (const opt::Arg *A = Args.getLastArg(OPT_color_EQ))
351     return StringSwitch<std::optional<bool>>(A->getValue())
352         .Case("always", true)
353         .Case("never", false)
354         .Case("auto", std::nullopt);
355   return std::nullopt;
356 }
357 
358 static object::BuildID parseBuildIDArg(const opt::InputArgList &Args, int ID) {
359   const opt::Arg *A = Args.getLastArg(ID);
360   if (!A)
361     return {};
362 
363   StringRef V(A->getValue());
364   object::BuildID BuildID = parseBuildID(V);
365   if (BuildID.empty()) {
366     errs() << A->getSpelling() + ": expected a build ID, but got '" + V + "'\n";
367     exit(1);
368   }
369   return BuildID;
370 }
371 
372 // Symbolize markup from stdin and write the result to stdout.
373 static void filterMarkup(const opt::InputArgList &Args, LLVMSymbolizer &Symbolizer) {
374   MarkupFilter Filter(outs(), Symbolizer, parseColorArg(Args));
375   std::string InputString;
376   while (std::getline(std::cin, InputString)) {
377     InputString += '\n';
378     Filter.filter(InputString);
379   }
380   Filter.finish();
381 }
382 
383 ExitOnError ExitOnErr;
384 
385 int main(int argc, char **argv) {
386   InitLLVM X(argc, argv);
387   sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
388 
389   bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
390   BumpPtrAllocator A;
391   StringSaver Saver(A);
392   SymbolizerOptTable Tbl;
393   opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl);
394 
395   LLVMSymbolizer::Options Opts;
396   uint64_t AdjustVMA;
397   PrinterConfig Config;
398   parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA);
399   if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) {
400     Opts.PathStyle =
401         A->getOption().matches(OPT_basenames)
402             ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
403             : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath;
404   } else {
405     Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath;
406   }
407   Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ);
408   Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str();
409   Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line);
410   Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str();
411   Opts.FallbackDebugPath =
412       Args.getLastArgValue(OPT_fallback_debug_path_EQ).str();
413   Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line);
414   parseIntArg(Args, OPT_print_source_context_lines_EQ,
415               Config.SourceContextLines);
416   Opts.RelativeAddresses = Args.hasArg(OPT_relative_address);
417   Opts.UntagAddresses =
418       Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line);
419   Opts.UseDIA = Args.hasArg(OPT_use_dia);
420 #if !defined(LLVM_ENABLE_DIA_SDK)
421   if (Opts.UseDIA) {
422     WithColor::warning() << "DIA not available; using native PDB reader\n";
423     Opts.UseDIA = false;
424   }
425 #endif
426   Opts.UseSymbolTable = true;
427   if (Args.hasArg(OPT_cache_size_EQ))
428     parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize);
429   Config.PrintAddress = Args.hasArg(OPT_addresses);
430   Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None;
431   Config.Pretty = Args.hasArg(OPT_pretty_print);
432   Config.Verbose = Args.hasArg(OPT_verbose);
433 
434   for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) {
435     StringRef Hint(A->getValue());
436     if (sys::path::extension(Hint) == ".dSYM") {
437       Opts.DsymHints.emplace_back(Hint);
438     } else {
439       errs() << "Warning: invalid dSYM hint: \"" << Hint
440              << "\" (must have the '.dSYM' extension).\n";
441     }
442   }
443 
444   LLVMSymbolizer Symbolizer(Opts);
445 
446   if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod, canUseDebuginfod()))
447     enableDebuginfod(Symbolizer, Args);
448 
449   if (Args.hasArg(OPT_filter_markup)) {
450     filterMarkup(Args, Symbolizer);
451     return 0;
452   }
453 
454   auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM;
455   if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) {
456     if (strcmp(A->getValue(), "GNU") == 0)
457       Style = OutputStyle::GNU;
458     else if (strcmp(A->getValue(), "JSON") == 0)
459       Style = OutputStyle::JSON;
460     else
461       Style = OutputStyle::LLVM;
462   }
463 
464   if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) {
465     errs() << "error: cannot specify both --build-id and --obj\n";
466     return EXIT_FAILURE;
467   }
468   object::BuildID BuildID = parseBuildIDArg(Args, OPT_build_id_EQ);
469 
470   std::unique_ptr<DIPrinter> Printer;
471   if (Style == OutputStyle::GNU)
472     Printer = std::make_unique<GNUPrinter>(outs(), errs(), Config);
473   else if (Style == OutputStyle::JSON)
474     Printer = std::make_unique<JSONPrinter>(outs(), Config);
475   else
476     Printer = std::make_unique<LLVMPrinter>(outs(), errs(), Config);
477 
478   std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);
479   if (InputAddresses.empty()) {
480     const int kMaxInputStringLength = 1024;
481     char InputString[kMaxInputStringLength];
482 
483     while (fgets(InputString, sizeof(InputString), stdin)) {
484       // Strip newline characters.
485       std::string StrippedInputString(InputString);
486       llvm::erase_if(StrippedInputString,
487                      [](char c) { return c == '\r' || c == '\n'; });
488       symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style,
489                      StrippedInputString, Symbolizer, *Printer);
490       outs().flush();
491     }
492   } else {
493     Printer->listBegin();
494     for (StringRef Address : InputAddresses)
495       symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address,
496                      Symbolizer, *Printer);
497     Printer->listEnd();
498   }
499 
500   return 0;
501 }
502