1 //===-- gsymutil.cpp - GSYM dumping and creation utility for llvm ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/ADT/StringSet.h"
12 #include "llvm/ADT/Triple.h"
13 #include "llvm/DebugInfo/DIContext.h"
14 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
15 #include "llvm/Object/Archive.h"
16 #include "llvm/Object/ELFObjectFile.h"
17 #include "llvm/Object/MachOUniversal.h"
18 #include "llvm/Object/ObjectFile.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/Format.h"
22 #include "llvm/Support/ManagedStatic.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/TargetSelect.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cstring>
32 #include <inttypes.h>
33 #include <map>
34 #include <string>
35 #include <system_error>
36 #include <vector>
37 
38 #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
39 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
40 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
41 #include "llvm/DebugInfo/GSYM/GsymReader.h"
42 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
43 #include "llvm/DebugInfo/GSYM/LookupResult.h"
44 #include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
45 
46 using namespace llvm;
47 using namespace gsym;
48 using namespace object;
49 
50 /// @}
51 /// Command line options.
52 /// @{
53 
54 namespace {
55 using namespace cl;
56 
57 OptionCategory GeneralOptions("Options");
58 OptionCategory ConversionOptions("Conversion Options");
59 OptionCategory LookupOptions("Lookup Options");
60 
61 static opt<bool> Help("h", desc("Alias for -help"), Hidden,
62                       cat(GeneralOptions));
63 
64 static opt<bool> Verbose("verbose",
65                          desc("Enable verbose logging and encoding details."),
66                          cat(GeneralOptions));
67 
68 static list<std::string> InputFilenames(Positional, desc("<input GSYM files>"),
69                                         ZeroOrMore, cat(GeneralOptions));
70 
71 static opt<std::string>
72     ConvertFilename("convert", cl::init(""),
73                     cl::desc("Convert the specified file to the GSYM format.\n"
74                              "Supported files include ELF and mach-o files "
75                              "that will have their debug info (DWARF) and "
76                              "symbol table converted."),
77                     cl::value_desc("path"), cat(ConversionOptions));
78 
79 static list<std::string>
80     ArchFilters("arch",
81                 desc("Process debug information for the specified CPU "
82                      "architecture only.\nArchitectures may be specified by "
83                      "name or by number.\nThis option can be specified "
84                      "multiple times, once for each desired architecture."),
85                 cl::value_desc("arch"), cat(ConversionOptions));
86 
87 static opt<std::string>
88     OutputFilename("out-file", cl::init(""),
89                    cl::desc("Specify the path where the converted GSYM file "
90                             "will be saved.\nWhen not specified, a '.gsym' "
91                             "extension will be appended to the file name "
92                             "specified in the --convert option."),
93                    cl::value_desc("path"), cat(ConversionOptions));
94 static alias OutputFilenameAlias("o", desc("Alias for -out-file."),
95                                  aliasopt(OutputFilename),
96                                  cat(ConversionOptions));
97 
98 static opt<bool> Verify("verify",
99                         desc("Verify the generated GSYM file against the "
100                              "information in the file that was converted."),
101                         cat(ConversionOptions));
102 
103 static opt<unsigned>
104     NumThreads("num-threads",
105                desc("Specify the maximum number (n) of simultaneous threads "
106                     "to use when converting files to GSYM.\nDefaults to the "
107                     "number of cores on the current machine."),
108                cl::value_desc("n"), cat(ConversionOptions));
109 
110 static list<uint64_t> LookupAddresses("address",
111                                       desc("Lookup an address in a GSYM file"),
112                                       cl::value_desc("addr"),
113                                       cat(LookupOptions));
114 
115 
116 
117 } // namespace
118 /// @}
119 //===----------------------------------------------------------------------===//
120 
error(StringRef Prefix,llvm::Error Err)121 static void error(StringRef Prefix, llvm::Error Err) {
122   if (!Err)
123     return;
124   errs() << Prefix << ": " << Err << "\n";
125   consumeError(std::move(Err));
126   exit(1);
127 }
128 
error(StringRef Prefix,std::error_code EC)129 static void error(StringRef Prefix, std::error_code EC) {
130   if (!EC)
131     return;
132   errs() << Prefix << ": " << EC.message() << "\n";
133   exit(1);
134 }
135 
136 
137 /// If the input path is a .dSYM bundle (as created by the dsymutil tool),
138 /// replace it with individual entries for each of the object files inside the
139 /// bundle otherwise return the input path.
expandBundle(const std::string & InputPath)140 static std::vector<std::string> expandBundle(const std::string &InputPath) {
141   std::vector<std::string> BundlePaths;
142   SmallString<256> BundlePath(InputPath);
143   // Manually open up the bundle to avoid introducing additional dependencies.
144   if (sys::fs::is_directory(BundlePath) &&
145       sys::path::extension(BundlePath) == ".dSYM") {
146     std::error_code EC;
147     sys::path::append(BundlePath, "Contents", "Resources", "DWARF");
148     for (sys::fs::directory_iterator Dir(BundlePath, EC), DirEnd;
149          Dir != DirEnd && !EC; Dir.increment(EC)) {
150       const std::string &Path = Dir->path();
151       sys::fs::file_status Status;
152       EC = sys::fs::status(Path, Status);
153       error(Path, EC);
154       switch (Status.type()) {
155       case sys::fs::file_type::regular_file:
156       case sys::fs::file_type::symlink_file:
157       case sys::fs::file_type::type_unknown:
158         BundlePaths.push_back(Path);
159         break;
160       default: /*ignore*/;
161       }
162     }
163     error(BundlePath, EC);
164   }
165   if (!BundlePaths.size())
166     BundlePaths.push_back(InputPath);
167   return BundlePaths;
168 }
169 
getCPUType(MachOObjectFile & MachO)170 static uint32_t getCPUType(MachOObjectFile &MachO) {
171   if (MachO.is64Bit())
172     return MachO.getHeader64().cputype;
173   else
174     return MachO.getHeader().cputype;
175 }
176 
177 /// Return true if the object file has not been filtered by an --arch option.
filterArch(MachOObjectFile & Obj)178 static bool filterArch(MachOObjectFile &Obj) {
179   if (ArchFilters.empty())
180     return true;
181 
182   Triple ObjTriple(Obj.getArchTriple());
183   StringRef ObjArch = ObjTriple.getArchName();
184 
185   for (auto Arch : ArchFilters) {
186     // Match name.
187     if (Arch == ObjArch)
188       return true;
189 
190     // Match architecture number.
191     unsigned Value;
192     if (!StringRef(Arch).getAsInteger(0, Value))
193       if (Value == getCPUType(Obj))
194         return true;
195   }
196   return false;
197 }
198 
199 /// Determine the virtual address that is considered the base address of an ELF
200 /// object file.
201 ///
202 /// The base address of an ELF file is the the "p_vaddr" of the first program
203 /// header whose "p_type" is PT_LOAD.
204 ///
205 /// \param ELFFile An ELF object file we will search.
206 ///
207 /// \returns A valid image base address if we are able to extract one.
208 template <class ELFT>
209 static llvm::Optional<uint64_t>
getImageBaseAddress(const object::ELFFile<ELFT> * ELFFile)210 getImageBaseAddress(const object::ELFFile<ELFT> *ELFFile) {
211   auto PhdrRangeOrErr = ELFFile->program_headers();
212   if (!PhdrRangeOrErr) {
213     consumeError(PhdrRangeOrErr.takeError());
214     return llvm::None;
215   }
216   for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr)
217     if (Phdr.p_type == ELF::PT_LOAD)
218       return (uint64_t)Phdr.p_vaddr;
219   return llvm::None;
220 }
221 
222 /// Determine the virtual address that is considered the base address of mach-o
223 /// object file.
224 ///
225 /// The base address of a mach-o file is the vmaddr of the  "__TEXT" segment.
226 ///
227 /// \param MachO A mach-o object file we will search.
228 ///
229 /// \returns A valid image base address if we are able to extract one.
230 static llvm::Optional<uint64_t>
getImageBaseAddress(const object::MachOObjectFile * MachO)231 getImageBaseAddress(const object::MachOObjectFile *MachO) {
232   for (const auto &Command : MachO->load_commands()) {
233     if (Command.C.cmd == MachO::LC_SEGMENT) {
234       MachO::segment_command SLC = MachO->getSegmentLoadCommand(Command);
235       StringRef SegName = SLC.segname;
236       if (SegName == "__TEXT")
237         return SLC.vmaddr;
238     } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
239       MachO::segment_command_64 SLC = MachO->getSegment64LoadCommand(Command);
240       StringRef SegName = SLC.segname;
241       if (SegName == "__TEXT")
242         return SLC.vmaddr;
243     }
244   }
245   return llvm::None;
246 }
247 
248 /// Determine the virtual address that is considered the base address of an
249 /// object file.
250 ///
251 /// Since GSYM files are used for symbolication, many clients will need to
252 /// easily adjust addresses they find in stack traces so the lookups happen
253 /// on unslid addresses from the original object file. If the base address of
254 /// a GSYM file is set to the base address of the image, then this address
255 /// adjusting is much easier.
256 ///
257 /// \param Obj An object file we will search.
258 ///
259 /// \returns A valid image base address if we are able to extract one.
getImageBaseAddress(object::ObjectFile & Obj)260 static llvm::Optional<uint64_t> getImageBaseAddress(object::ObjectFile &Obj) {
261   if (const auto *MachO = dyn_cast<object::MachOObjectFile>(&Obj))
262     return getImageBaseAddress(MachO);
263   else if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(&Obj))
264     return getImageBaseAddress(ELFObj->getELFFile());
265   else if (const auto *ELFObj = dyn_cast<object::ELF32BEObjectFile>(&Obj))
266     return getImageBaseAddress(ELFObj->getELFFile());
267   else if (const auto *ELFObj = dyn_cast<object::ELF64LEObjectFile>(&Obj))
268     return getImageBaseAddress(ELFObj->getELFFile());
269   else if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(&Obj))
270     return getImageBaseAddress(ELFObj->getELFFile());
271   return llvm::None;
272 }
273 
274 
handleObjectFile(ObjectFile & Obj,const std::string & OutFile)275 static llvm::Error handleObjectFile(ObjectFile &Obj,
276                                     const std::string &OutFile) {
277   auto ThreadCount =
278       NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
279   auto &OS = outs();
280 
281   GsymCreator Gsym;
282 
283   // See if we can figure out the base address for a given object file, and if
284   // we can, then set the base address to use to this value. This will ease
285   // symbolication since clients can slide the GSYM lookup addresses by using
286   // the load bias of the shared library.
287   if (auto ImageBaseAddr = getImageBaseAddress(Obj))
288     Gsym.setBaseAddress(*ImageBaseAddr);
289 
290   // We need to know where the valid sections are that contain instructions.
291   // See header documentation for DWARFTransformer::SetValidTextRanges() for
292   // defails.
293   AddressRanges TextRanges;
294   for (const object::SectionRef &Sect : Obj.sections()) {
295     if (!Sect.isText())
296       continue;
297     const uint64_t Size = Sect.getSize();
298     if (Size == 0)
299       continue;
300     const uint64_t StartAddr = Sect.getAddress();
301     TextRanges.insert(AddressRange(StartAddr, StartAddr + Size));
302   }
303 
304   // Make sure there is DWARF to convert first.
305   std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(Obj);
306   if (!DICtx)
307     return createStringError(std::errc::invalid_argument,
308                              "unable to create DWARF context");
309   logAllUnhandledErrors(DICtx->loadRegisterInfo(Obj), OS,
310                         "DwarfTransformer: ");
311 
312   // Make a DWARF transformer object and populate the ranges of the code
313   // so we don't end up adding invalid functions to GSYM data.
314   DwarfTransformer DT(*DICtx, OS, Gsym);
315   if (!TextRanges.empty())
316     Gsym.SetValidTextRanges(TextRanges);
317 
318   // Convert all DWARF to GSYM.
319   if (auto Err = DT.convert(ThreadCount))
320     return Err;
321 
322   // Get the UUID and convert symbol table to GSYM.
323   if (auto Err = ObjectFileTransformer::convert(Obj, OS, Gsym))
324     return Err;
325 
326   // Finalize the GSYM to make it ready to save to disk. This will remove
327   // duplicate FunctionInfo entries where we might have found an entry from
328   // debug info and also a symbol table entry from the object file.
329   if (auto Err = Gsym.finalize(OS))
330     return Err;
331 
332   // Save the GSYM file to disk.
333   support::endianness Endian = Obj.makeTriple().isLittleEndian() ?
334       support::little : support::big;
335   if (auto Err = Gsym.save(OutFile.c_str(), Endian))
336     return Err;
337 
338   // Verify the DWARF if requested. This will ensure all the info in the DWARF
339   // can be looked up in the GSYM and that all lookups get matching data.
340   if (Verify) {
341     if (auto Err = DT.verify(OutFile))
342       return Err;
343   }
344 
345   return Error::success();
346 }
347 
handleBuffer(StringRef Filename,MemoryBufferRef Buffer,const std::string & OutFile)348 static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
349                                 const std::string &OutFile) {
350   Expected<std::unique_ptr<Binary>> BinOrErr = object::createBinary(Buffer);
351   error(Filename, errorToErrorCode(BinOrErr.takeError()));
352 
353   if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
354     Triple ObjTriple(Obj->makeTriple());
355     auto ArchName = ObjTriple.getArchName();
356     outs() << "Output file (" << ArchName << "): " << OutFile << "\n";
357     if (auto Err = handleObjectFile(*Obj, OutFile.c_str()))
358       return Err;
359   } else if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get())) {
360     // Iterate over all contained architectures and filter out any that were
361     // not specified with the "--arch <arch>" option. If the --arch option was
362     // not specified on the command line, we will process all architectures.
363     std::vector< std::unique_ptr<MachOObjectFile> > FilterObjs;
364     for (auto &ObjForArch : Fat->objects()) {
365       if (auto MachOOrErr = ObjForArch.getAsObjectFile()) {
366         auto &Obj = **MachOOrErr;
367         if (filterArch(Obj))
368           FilterObjs.emplace_back(MachOOrErr->release());
369       } else {
370         error(Filename, MachOOrErr.takeError());
371       }
372     }
373     if (FilterObjs.empty())
374       error(Filename, createStringError(std::errc::invalid_argument,
375                                         "no matching architectures found"));
376 
377     // Now handle each architecture we need to convert.
378     for (auto &Obj: FilterObjs) {
379       Triple ObjTriple(Obj->getArchTriple());
380       auto ArchName = ObjTriple.getArchName();
381       std::string ArchOutFile(OutFile);
382       // If we are only handling a single architecture, then we will use the
383       // normal output file. If we are handling multiple architectures append
384       // the architecture name to the end of the out file path so that we
385       // don't overwrite the previous architecture's gsym file.
386       if (FilterObjs.size() > 1) {
387         ArchOutFile.append(1, '.');
388         ArchOutFile.append(ArchName.str());
389       }
390       outs() << "Output file (" << ArchName << "): " << ArchOutFile << "\n";
391       if (auto Err = handleObjectFile(*Obj, ArchOutFile))
392         return Err;
393     }
394   }
395   return Error::success();
396 }
397 
handleFileConversionToGSYM(StringRef Filename,const std::string & OutFile)398 static llvm::Error handleFileConversionToGSYM(StringRef Filename,
399                                               const std::string &OutFile) {
400   ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
401       MemoryBuffer::getFileOrSTDIN(Filename);
402   error(Filename, BuffOrErr.getError());
403   std::unique_ptr<MemoryBuffer> Buffer = std::move(BuffOrErr.get());
404   return handleBuffer(Filename, *Buffer, OutFile);
405 }
406 
convertFileToGSYM(raw_ostream & OS)407 static llvm::Error convertFileToGSYM(raw_ostream &OS) {
408   // Expand any .dSYM bundles to the individual object files contained therein.
409   std::vector<std::string> Objects;
410   std::string OutFile = OutputFilename;
411   if (OutFile.empty()) {
412     OutFile = ConvertFilename;
413     OutFile += ".gsym";
414   }
415 
416   OS << "Input file: " << ConvertFilename << "\n";
417 
418   auto Objs = expandBundle(ConvertFilename);
419   Objects.insert(Objects.end(), Objs.begin(), Objs.end());
420 
421   for (auto Object : Objects) {
422     if (auto Err = handleFileConversionToGSYM(Object, OutFile))
423       return Err;
424   }
425   return Error::success();
426 }
427 
main(int argc,char const * argv[])428 int main(int argc, char const *argv[]) {
429   // Print a stack trace if we signal out.
430   sys::PrintStackTraceOnErrorSignal(argv[0]);
431   PrettyStackTraceProgram X(argc, argv);
432   llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
433 
434   llvm::InitializeAllTargets();
435 
436   const char *Overview =
437       "A tool for dumping, searching and creating GSYM files.\n\n"
438       "Specify one or more GSYM paths as arguments to dump all of the "
439       "information in each GSYM file.\n"
440       "Specify a single GSYM file along with one or more --lookup options to "
441       "lookup addresses within that GSYM file.\n"
442       "Use the --convert option to specify a file with option --out-file "
443       "option to convert to GSYM format.\n";
444   HideUnrelatedOptions(
445       {&GeneralOptions, &ConversionOptions, &LookupOptions});
446   cl::ParseCommandLineOptions(argc, argv, Overview);
447 
448   if (Help) {
449     PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true);
450     return 0;
451   }
452 
453   raw_ostream &OS = outs();
454 
455   if (!ConvertFilename.empty()) {
456     // Convert DWARF to GSYM
457     if (!InputFilenames.empty()) {
458       OS << "error: no input files can be specified when using the --convert "
459             "option.\n";
460       return 1;
461     }
462     // Call error() if we have an error and it will exit with a status of 1
463     if (auto Err = convertFileToGSYM(OS))
464       error("DWARF conversion failed: ", std::move(Err));
465     return 0;
466   }
467 
468   // Dump or access data inside GSYM files
469   for (const auto &GSYMPath : InputFilenames) {
470     auto Gsym = GsymReader::openFile(GSYMPath);
471     if (!Gsym)
472       error(GSYMPath, Gsym.takeError());
473 
474     if (LookupAddresses.empty()) {
475       Gsym->dump(outs());
476       continue;
477     }
478 
479     // Lookup an address in a GSYM file and print any matches.
480     OS << "Looking up addresses in \"" << GSYMPath << "\":\n";
481     for (auto Addr: LookupAddresses) {
482       if (auto Result = Gsym->lookup(Addr)) {
483         // If verbose is enabled dump the full function info for the address.
484         if (Verbose) {
485           if (auto FI = Gsym->getFunctionInfo(Addr)) {
486             OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
487             Gsym->dump(OS, *FI);
488             OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
489           }
490         }
491         OS << Result.get();
492       } else {
493         if (Verbose)
494           OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
495         OS << HEX64(Addr) << ": ";
496         logAllUnhandledErrors(Result.takeError(), OS, "error: ");
497       }
498       if (Verbose)
499         OS << "\n";
500     }
501   }
502   return EXIT_SUCCESS;
503 }
504