1 //===-- gsymutil.cpp - GSYM dumping and creation utility for llvm ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/ADT/StringSet.h"
12 #include "llvm/ADT/Triple.h"
13 #include "llvm/DebugInfo/DIContext.h"
14 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
15 #include "llvm/Object/Archive.h"
16 #include "llvm/Object/ELFObjectFile.h"
17 #include "llvm/Object/MachOUniversal.h"
18 #include "llvm/Object/ObjectFile.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/Format.h"
22 #include "llvm/Support/ManagedStatic.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/TargetSelect.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cstring>
32 #include <inttypes.h>
33 #include <iostream>
34 #include <map>
35 #include <string>
36 #include <system_error>
37 #include <vector>
38 
39 #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
40 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
41 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
42 #include "llvm/DebugInfo/GSYM/GsymReader.h"
43 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
44 #include "llvm/DebugInfo/GSYM/LookupResult.h"
45 #include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
46 
47 using namespace llvm;
48 using namespace gsym;
49 using namespace object;
50 
51 /// @}
52 /// Command line options.
53 /// @{
54 
55 namespace {
56 using namespace cl;
57 
58 OptionCategory GeneralOptions("Options");
59 OptionCategory ConversionOptions("Conversion Options");
60 OptionCategory LookupOptions("Lookup Options");
61 
62 static opt<bool> Help("h", desc("Alias for -help"), Hidden,
63                       cat(GeneralOptions));
64 
65 static opt<bool> Verbose("verbose",
66                          desc("Enable verbose logging and encoding details."),
67                          cat(GeneralOptions));
68 
69 static list<std::string> InputFilenames(Positional, desc("<input GSYM files>"),
70                                         ZeroOrMore, cat(GeneralOptions));
71 
72 static opt<std::string>
73     ConvertFilename("convert", cl::init(""),
74                     cl::desc("Convert the specified file to the GSYM format.\n"
75                              "Supported files include ELF and mach-o files "
76                              "that will have their debug info (DWARF) and "
77                              "symbol table converted."),
78                     cl::value_desc("path"), cat(ConversionOptions));
79 
80 static list<std::string>
81     ArchFilters("arch",
82                 desc("Process debug information for the specified CPU "
83                      "architecture only.\nArchitectures may be specified by "
84                      "name or by number.\nThis option can be specified "
85                      "multiple times, once for each desired architecture."),
86                 cl::value_desc("arch"), cat(ConversionOptions));
87 
88 static opt<std::string>
89     OutputFilename("out-file", cl::init(""),
90                    cl::desc("Specify the path where the converted GSYM file "
91                             "will be saved.\nWhen not specified, a '.gsym' "
92                             "extension will be appended to the file name "
93                             "specified in the --convert option."),
94                    cl::value_desc("path"), cat(ConversionOptions));
95 static alias OutputFilenameAlias("o", desc("Alias for -out-file."),
96                                  aliasopt(OutputFilename),
97                                  cat(ConversionOptions));
98 
99 static opt<bool> Verify("verify",
100                         desc("Verify the generated GSYM file against the "
101                              "information in the file that was converted."),
102                         cat(ConversionOptions));
103 
104 static opt<unsigned>
105     NumThreads("num-threads",
106                desc("Specify the maximum number (n) of simultaneous threads "
107                     "to use when converting files to GSYM.\nDefaults to the "
108                     "number of cores on the current machine."),
109                cl::value_desc("n"), cat(ConversionOptions));
110 
111 static list<uint64_t> LookupAddresses("address",
112                                       desc("Lookup an address in a GSYM file"),
113                                       cl::value_desc("addr"),
114                                       cat(LookupOptions));
115 
116 static opt<bool> LookupAddressesFromStdin(
117     "addresses-from-stdin",
118     desc("Lookup addresses in a GSYM file that are read from stdin\nEach input "
119          "line is expected to be of the following format: <addr> <gsym-path>"),
120     cat(LookupOptions));
121 
122 } // namespace
123 /// @}
124 //===----------------------------------------------------------------------===//
125 
error(StringRef Prefix,llvm::Error Err)126 static void error(StringRef Prefix, llvm::Error Err) {
127   if (!Err)
128     return;
129   errs() << Prefix << ": " << Err << "\n";
130   consumeError(std::move(Err));
131   exit(1);
132 }
133 
error(StringRef Prefix,std::error_code EC)134 static void error(StringRef Prefix, std::error_code EC) {
135   if (!EC)
136     return;
137   errs() << Prefix << ": " << EC.message() << "\n";
138   exit(1);
139 }
140 
141 /// If the input path is a .dSYM bundle (as created by the dsymutil tool),
142 /// replace it with individual entries for each of the object files inside the
143 /// bundle otherwise return the input path.
expandBundle(const std::string & InputPath)144 static std::vector<std::string> expandBundle(const std::string &InputPath) {
145   std::vector<std::string> BundlePaths;
146   SmallString<256> BundlePath(InputPath);
147   // Manually open up the bundle to avoid introducing additional dependencies.
148   if (sys::fs::is_directory(BundlePath) &&
149       sys::path::extension(BundlePath) == ".dSYM") {
150     std::error_code EC;
151     sys::path::append(BundlePath, "Contents", "Resources", "DWARF");
152     for (sys::fs::directory_iterator Dir(BundlePath, EC), DirEnd;
153          Dir != DirEnd && !EC; Dir.increment(EC)) {
154       const std::string &Path = Dir->path();
155       sys::fs::file_status Status;
156       EC = sys::fs::status(Path, Status);
157       error(Path, EC);
158       switch (Status.type()) {
159       case sys::fs::file_type::regular_file:
160       case sys::fs::file_type::symlink_file:
161       case sys::fs::file_type::type_unknown:
162         BundlePaths.push_back(Path);
163         break;
164       default: /*ignore*/;
165       }
166     }
167     error(BundlePath, EC);
168   }
169   if (!BundlePaths.size())
170     BundlePaths.push_back(InputPath);
171   return BundlePaths;
172 }
173 
getCPUType(MachOObjectFile & MachO)174 static uint32_t getCPUType(MachOObjectFile &MachO) {
175   if (MachO.is64Bit())
176     return MachO.getHeader64().cputype;
177   else
178     return MachO.getHeader().cputype;
179 }
180 
181 /// Return true if the object file has not been filtered by an --arch option.
filterArch(MachOObjectFile & Obj)182 static bool filterArch(MachOObjectFile &Obj) {
183   if (ArchFilters.empty())
184     return true;
185 
186   Triple ObjTriple(Obj.getArchTriple());
187   StringRef ObjArch = ObjTriple.getArchName();
188 
189   for (auto Arch : ArchFilters) {
190     // Match name.
191     if (Arch == ObjArch)
192       return true;
193 
194     // Match architecture number.
195     unsigned Value;
196     if (!StringRef(Arch).getAsInteger(0, Value))
197       if (Value == getCPUType(Obj))
198         return true;
199   }
200   return false;
201 }
202 
203 /// Determine the virtual address that is considered the base address of an ELF
204 /// object file.
205 ///
206 /// The base address of an ELF file is the the "p_vaddr" of the first program
207 /// header whose "p_type" is PT_LOAD.
208 ///
209 /// \param ELFFile An ELF object file we will search.
210 ///
211 /// \returns A valid image base address if we are able to extract one.
212 template <class ELFT>
213 static llvm::Optional<uint64_t>
getImageBaseAddress(const object::ELFFile<ELFT> & ELFFile)214 getImageBaseAddress(const object::ELFFile<ELFT> &ELFFile) {
215   auto PhdrRangeOrErr = ELFFile.program_headers();
216   if (!PhdrRangeOrErr) {
217     consumeError(PhdrRangeOrErr.takeError());
218     return llvm::None;
219   }
220   for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr)
221     if (Phdr.p_type == ELF::PT_LOAD)
222       return (uint64_t)Phdr.p_vaddr;
223   return llvm::None;
224 }
225 
226 /// Determine the virtual address that is considered the base address of mach-o
227 /// object file.
228 ///
229 /// The base address of a mach-o file is the vmaddr of the  "__TEXT" segment.
230 ///
231 /// \param MachO A mach-o object file we will search.
232 ///
233 /// \returns A valid image base address if we are able to extract one.
234 static llvm::Optional<uint64_t>
getImageBaseAddress(const object::MachOObjectFile * MachO)235 getImageBaseAddress(const object::MachOObjectFile *MachO) {
236   for (const auto &Command : MachO->load_commands()) {
237     if (Command.C.cmd == MachO::LC_SEGMENT) {
238       MachO::segment_command SLC = MachO->getSegmentLoadCommand(Command);
239       StringRef SegName = SLC.segname;
240       if (SegName == "__TEXT")
241         return SLC.vmaddr;
242     } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
243       MachO::segment_command_64 SLC = MachO->getSegment64LoadCommand(Command);
244       StringRef SegName = SLC.segname;
245       if (SegName == "__TEXT")
246         return SLC.vmaddr;
247     }
248   }
249   return llvm::None;
250 }
251 
252 /// Determine the virtual address that is considered the base address of an
253 /// object file.
254 ///
255 /// Since GSYM files are used for symbolication, many clients will need to
256 /// easily adjust addresses they find in stack traces so the lookups happen
257 /// on unslid addresses from the original object file. If the base address of
258 /// a GSYM file is set to the base address of the image, then this address
259 /// adjusting is much easier.
260 ///
261 /// \param Obj An object file we will search.
262 ///
263 /// \returns A valid image base address if we are able to extract one.
getImageBaseAddress(object::ObjectFile & Obj)264 static llvm::Optional<uint64_t> getImageBaseAddress(object::ObjectFile &Obj) {
265   if (const auto *MachO = dyn_cast<object::MachOObjectFile>(&Obj))
266     return getImageBaseAddress(MachO);
267   else if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(&Obj))
268     return getImageBaseAddress(ELFObj->getELFFile());
269   else if (const auto *ELFObj = dyn_cast<object::ELF32BEObjectFile>(&Obj))
270     return getImageBaseAddress(ELFObj->getELFFile());
271   else if (const auto *ELFObj = dyn_cast<object::ELF64LEObjectFile>(&Obj))
272     return getImageBaseAddress(ELFObj->getELFFile());
273   else if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(&Obj))
274     return getImageBaseAddress(ELFObj->getELFFile());
275   return llvm::None;
276 }
277 
handleObjectFile(ObjectFile & Obj,const std::string & OutFile)278 static llvm::Error handleObjectFile(ObjectFile &Obj,
279                                     const std::string &OutFile) {
280   auto ThreadCount =
281       NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
282   auto &OS = outs();
283 
284   GsymCreator Gsym;
285 
286   // See if we can figure out the base address for a given object file, and if
287   // we can, then set the base address to use to this value. This will ease
288   // symbolication since clients can slide the GSYM lookup addresses by using
289   // the load bias of the shared library.
290   if (auto ImageBaseAddr = getImageBaseAddress(Obj))
291     Gsym.setBaseAddress(*ImageBaseAddr);
292 
293   // We need to know where the valid sections are that contain instructions.
294   // See header documentation for DWARFTransformer::SetValidTextRanges() for
295   // defails.
296   AddressRanges TextRanges;
297   for (const object::SectionRef &Sect : Obj.sections()) {
298     if (!Sect.isText())
299       continue;
300     const uint64_t Size = Sect.getSize();
301     if (Size == 0)
302       continue;
303     const uint64_t StartAddr = Sect.getAddress();
304     TextRanges.insert(AddressRange(StartAddr, StartAddr + Size));
305   }
306 
307   // Make sure there is DWARF to convert first.
308   std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(Obj);
309   if (!DICtx)
310     return createStringError(std::errc::invalid_argument,
311                              "unable to create DWARF context");
312   logAllUnhandledErrors(DICtx->loadRegisterInfo(Obj), OS, "DwarfTransformer: ");
313 
314   // Make a DWARF transformer object and populate the ranges of the code
315   // so we don't end up adding invalid functions to GSYM data.
316   DwarfTransformer DT(*DICtx, OS, Gsym);
317   if (!TextRanges.empty())
318     Gsym.SetValidTextRanges(TextRanges);
319 
320   // Convert all DWARF to GSYM.
321   if (auto Err = DT.convert(ThreadCount))
322     return Err;
323 
324   // Get the UUID and convert symbol table to GSYM.
325   if (auto Err = ObjectFileTransformer::convert(Obj, OS, Gsym))
326     return Err;
327 
328   // Finalize the GSYM to make it ready to save to disk. This will remove
329   // duplicate FunctionInfo entries where we might have found an entry from
330   // debug info and also a symbol table entry from the object file.
331   if (auto Err = Gsym.finalize(OS))
332     return Err;
333 
334   // Save the GSYM file to disk.
335   support::endianness Endian =
336       Obj.makeTriple().isLittleEndian() ? support::little : support::big;
337   if (auto Err = Gsym.save(OutFile.c_str(), Endian))
338     return Err;
339 
340   // Verify the DWARF if requested. This will ensure all the info in the DWARF
341   // can be looked up in the GSYM and that all lookups get matching data.
342   if (Verify) {
343     if (auto Err = DT.verify(OutFile))
344       return Err;
345   }
346 
347   return Error::success();
348 }
349 
handleBuffer(StringRef Filename,MemoryBufferRef Buffer,const std::string & OutFile)350 static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
351                                 const std::string &OutFile) {
352   Expected<std::unique_ptr<Binary>> BinOrErr = object::createBinary(Buffer);
353   error(Filename, errorToErrorCode(BinOrErr.takeError()));
354 
355   if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
356     Triple ObjTriple(Obj->makeTriple());
357     auto ArchName = ObjTriple.getArchName();
358     outs() << "Output file (" << ArchName << "): " << OutFile << "\n";
359     if (auto Err = handleObjectFile(*Obj, OutFile.c_str()))
360       return Err;
361   } else if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get())) {
362     // Iterate over all contained architectures and filter out any that were
363     // not specified with the "--arch <arch>" option. If the --arch option was
364     // not specified on the command line, we will process all architectures.
365     std::vector<std::unique_ptr<MachOObjectFile>> FilterObjs;
366     for (auto &ObjForArch : Fat->objects()) {
367       if (auto MachOOrErr = ObjForArch.getAsObjectFile()) {
368         auto &Obj = **MachOOrErr;
369         if (filterArch(Obj))
370           FilterObjs.emplace_back(MachOOrErr->release());
371       } else {
372         error(Filename, MachOOrErr.takeError());
373       }
374     }
375     if (FilterObjs.empty())
376       error(Filename, createStringError(std::errc::invalid_argument,
377                                         "no matching architectures found"));
378 
379     // Now handle each architecture we need to convert.
380     for (auto &Obj : FilterObjs) {
381       Triple ObjTriple(Obj->getArchTriple());
382       auto ArchName = ObjTriple.getArchName();
383       std::string ArchOutFile(OutFile);
384       // If we are only handling a single architecture, then we will use the
385       // normal output file. If we are handling multiple architectures append
386       // the architecture name to the end of the out file path so that we
387       // don't overwrite the previous architecture's gsym file.
388       if (FilterObjs.size() > 1) {
389         ArchOutFile.append(1, '.');
390         ArchOutFile.append(ArchName.str());
391       }
392       outs() << "Output file (" << ArchName << "): " << ArchOutFile << "\n";
393       if (auto Err = handleObjectFile(*Obj, ArchOutFile))
394         return Err;
395     }
396   }
397   return Error::success();
398 }
399 
handleFileConversionToGSYM(StringRef Filename,const std::string & OutFile)400 static llvm::Error handleFileConversionToGSYM(StringRef Filename,
401                                               const std::string &OutFile) {
402   ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
403       MemoryBuffer::getFileOrSTDIN(Filename);
404   error(Filename, BuffOrErr.getError());
405   std::unique_ptr<MemoryBuffer> Buffer = std::move(BuffOrErr.get());
406   return handleBuffer(Filename, *Buffer, OutFile);
407 }
408 
convertFileToGSYM(raw_ostream & OS)409 static llvm::Error convertFileToGSYM(raw_ostream &OS) {
410   // Expand any .dSYM bundles to the individual object files contained therein.
411   std::vector<std::string> Objects;
412   std::string OutFile = OutputFilename;
413   if (OutFile.empty()) {
414     OutFile = ConvertFilename;
415     OutFile += ".gsym";
416   }
417 
418   OS << "Input file: " << ConvertFilename << "\n";
419 
420   auto Objs = expandBundle(ConvertFilename);
421   llvm::append_range(Objects, Objs);
422 
423   for (auto Object : Objects) {
424     if (auto Err = handleFileConversionToGSYM(Object, OutFile))
425       return Err;
426   }
427   return Error::success();
428 }
429 
doLookup(GsymReader & Gsym,uint64_t Addr,raw_ostream & OS)430 static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
431   if (auto Result = Gsym.lookup(Addr)) {
432     // If verbose is enabled dump the full function info for the address.
433     if (Verbose) {
434       if (auto FI = Gsym.getFunctionInfo(Addr)) {
435         OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
436         Gsym.dump(OS, *FI);
437         OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
438       }
439     }
440     OS << Result.get();
441   } else {
442     if (Verbose)
443       OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
444     OS << HEX64(Addr) << ": ";
445     logAllUnhandledErrors(Result.takeError(), OS, "error: ");
446   }
447   if (Verbose)
448     OS << "\n";
449 }
450 
main(int argc,char const * argv[])451 int main(int argc, char const *argv[]) {
452   // Print a stack trace if we signal out.
453   sys::PrintStackTraceOnErrorSignal(argv[0]);
454   PrettyStackTraceProgram X(argc, argv);
455   llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
456 
457   llvm::InitializeAllTargets();
458 
459   const char *Overview =
460       "A tool for dumping, searching and creating GSYM files.\n\n"
461       "Specify one or more GSYM paths as arguments to dump all of the "
462       "information in each GSYM file.\n"
463       "Specify a single GSYM file along with one or more --lookup options to "
464       "lookup addresses within that GSYM file.\n"
465       "Use the --convert option to specify a file with option --out-file "
466       "option to convert to GSYM format.\n";
467   HideUnrelatedOptions({&GeneralOptions, &ConversionOptions, &LookupOptions});
468   cl::ParseCommandLineOptions(argc, argv, Overview);
469 
470   if (Help) {
471     PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true);
472     return 0;
473   }
474 
475   raw_ostream &OS = outs();
476 
477   if (!ConvertFilename.empty()) {
478     // Convert DWARF to GSYM
479     if (!InputFilenames.empty()) {
480       OS << "error: no input files can be specified when using the --convert "
481             "option.\n";
482       return 1;
483     }
484     // Call error() if we have an error and it will exit with a status of 1
485     if (auto Err = convertFileToGSYM(OS))
486       error("DWARF conversion failed: ", std::move(Err));
487     return 0;
488   }
489 
490   if (LookupAddressesFromStdin) {
491     if (!LookupAddresses.empty() || !InputFilenames.empty()) {
492       OS << "error: no input files or addresses can be specified when using "
493             "the --addresses-from-stdin "
494             "option.\n";
495       return 1;
496     }
497 
498     std::string InputLine;
499     std::string CurrentGSYMPath;
500     llvm::Optional<Expected<GsymReader>> CurrentGsym;
501 
502     while (std::getline(std::cin, InputLine)) {
503       // Strip newline characters.
504       std::string StrippedInputLine(InputLine);
505       llvm::erase_if(StrippedInputLine,
506                      [](char c) { return c == '\r' || c == '\n'; });
507 
508       StringRef AddrStr, GSYMPath;
509       std::tie(AddrStr, GSYMPath) =
510           llvm::StringRef{StrippedInputLine}.split(' ');
511 
512       if (GSYMPath != CurrentGSYMPath) {
513         CurrentGsym = GsymReader::openFile(GSYMPath);
514         if (!*CurrentGsym)
515           error(GSYMPath, CurrentGsym->takeError());
516       }
517 
518       uint64_t Addr;
519       if (AddrStr.getAsInteger(0, Addr)) {
520         OS << "error: invalid address " << AddrStr
521            << ", expected: Address GsymFile.\n";
522         return 1;
523       }
524 
525       doLookup(**CurrentGsym, Addr, OS);
526 
527       OS << "\n";
528       OS.flush();
529     }
530 
531     return EXIT_SUCCESS;
532   }
533 
534   // Dump or access data inside GSYM files
535   for (const auto &GSYMPath : InputFilenames) {
536     auto Gsym = GsymReader::openFile(GSYMPath);
537     if (!Gsym)
538       error(GSYMPath, Gsym.takeError());
539 
540     if (LookupAddresses.empty()) {
541       Gsym->dump(outs());
542       continue;
543     }
544 
545     // Lookup an address in a GSYM file and print any matches.
546     OS << "Looking up addresses in \"" << GSYMPath << "\":\n";
547     for (auto Addr : LookupAddresses) {
548       doLookup(*Gsym, Addr, OS);
549     }
550   }
551   return EXIT_SUCCESS;
552 }
553