1 //===- dsymutil.cpp - Debug info dumping utility for llvm -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This program is a utility that aims to be a dropin replacement for Darwin's
11 // dsymutil.
12 //===----------------------------------------------------------------------===//
13 
14 #include "dsymutil.h"
15 #include "BinaryHolder.h"
16 #include "CFBundle.h"
17 #include "DebugMap.h"
18 #include "LinkUtils.h"
19 #include "MachOUtils.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/ADT/Triple.h"
25 #include "llvm/DebugInfo/DIContext.h"
26 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
27 #include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
28 #include "llvm/Object/Binary.h"
29 #include "llvm/Object/MachO.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/Support/InitLLVM.h"
33 #include "llvm/Support/ManagedStatic.h"
34 #include "llvm/Support/Path.h"
35 #include "llvm/Support/TargetSelect.h"
36 #include "llvm/Support/ThreadPool.h"
37 #include "llvm/Support/WithColor.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "llvm/Support/thread.h"
40 #include <algorithm>
41 #include <cstdint>
42 #include <cstdlib>
43 #include <string>
44 #include <system_error>
45 
46 using namespace llvm;
47 using namespace llvm::cl;
48 using namespace llvm::dsymutil;
49 using namespace object;
50 
51 static OptionCategory DsymCategory("Specific Options");
52 static opt<bool> Help("h", desc("Alias for -help"), Hidden);
53 static opt<bool> Version("v", desc("Alias for -version"), Hidden);
54 
55 static list<std::string> InputFiles(Positional, OneOrMore,
56                                     desc("<input files>"), cat(DsymCategory));
57 
58 static opt<std::string>
59     OutputFileOpt("o",
60                   desc("Specify the output file. default: <input file>.dwarf"),
61                   value_desc("filename"), cat(DsymCategory));
62 static alias OutputFileOptA("out", desc("Alias for -o"),
63                             aliasopt(OutputFileOpt));
64 
65 static opt<std::string> OsoPrependPath(
66     "oso-prepend-path",
67     desc("Specify a directory to prepend to the paths of object files."),
68     value_desc("path"), cat(DsymCategory));
69 
70 static opt<bool> Assembly(
71     "S",
72     desc("Output textual assembly instead of a binary dSYM companion file."),
73     init(false), cat(DsymCategory), cl::Hidden);
74 
75 static opt<bool> DumpStab(
76     "symtab",
77     desc("Dumps the symbol table found in executable or object file(s) and\n"
78          "exits."),
79     init(false), cat(DsymCategory));
80 static alias DumpStabA("s", desc("Alias for --symtab"), aliasopt(DumpStab));
81 
82 static opt<bool> FlatOut("flat",
83                          desc("Produce a flat dSYM file (not a bundle)."),
84                          init(false), cat(DsymCategory));
85 static alias FlatOutA("f", desc("Alias for --flat"), aliasopt(FlatOut));
86 
87 static opt<bool> Minimize(
88     "minimize",
89     desc("When used when creating a dSYM file with Apple accelerator tables,\n"
90          "this option will suppress the emission of the .debug_inlines, \n"
91          ".debug_pubnames, and .debug_pubtypes sections since dsymutil \n"
92          "has better equivalents: .apple_names and .apple_types. When used in\n"
93          "conjunction with --update option, this option will cause redundant\n"
94          "accelerator tables to be removed."),
95     init(false), cat(DsymCategory));
96 static alias MinimizeA("z", desc("Alias for --minimize"), aliasopt(Minimize));
97 
98 static opt<bool> Update(
99     "update",
100     desc("Updates existing dSYM files to contain the latest accelerator\n"
101          "tables and other DWARF optimizations."),
102     init(false), cat(DsymCategory));
103 static alias UpdateA("u", desc("Alias for --update"), aliasopt(Update));
104 
105 static opt<std::string> SymbolMap(
106     "symbol-map",
107     desc("Updates the existing dSYMs inplace using symbol map specified."),
108     value_desc("bcsymbolmap"), cat(DsymCategory));
109 
110 static cl::opt<AccelTableKind> AcceleratorTable(
111     "accelerator", cl::desc("Output accelerator tables."),
112     cl::values(clEnumValN(AccelTableKind::Default, "Default",
113                           "Default for input."),
114                clEnumValN(AccelTableKind::Apple, "Apple", "Apple"),
115                clEnumValN(AccelTableKind::Dwarf, "Dwarf", "DWARF")),
116     cl::init(AccelTableKind::Default), cat(DsymCategory));
117 
118 static opt<unsigned> NumThreads(
119     "num-threads",
120     desc("Specifies the maximum number (n) of simultaneous threads to use\n"
121          "when linking multiple architectures."),
122     value_desc("n"), init(0), cat(DsymCategory));
123 static alias NumThreadsA("j", desc("Alias for --num-threads"),
124                          aliasopt(NumThreads));
125 
126 static opt<bool> Verbose("verbose", desc("Verbosity level"), init(false),
127                          cat(DsymCategory));
128 
129 static opt<bool>
130     NoOutput("no-output",
131              desc("Do the link in memory, but do not emit the result file."),
132              init(false), cat(DsymCategory));
133 
134 static opt<bool>
135     NoTimestamp("no-swiftmodule-timestamp",
136                 desc("Don't check timestamp for swiftmodule files."),
137                 init(false), cat(DsymCategory));
138 
139 static list<std::string> ArchFlags(
140     "arch",
141     desc("Link DWARF debug information only for specified CPU architecture\n"
142          "types. This option can be specified multiple times, once for each\n"
143          "desired architecture. All CPU architectures will be linked by\n"
144          "default."),
145     value_desc("arch"), ZeroOrMore, cat(DsymCategory));
146 
147 static opt<bool>
148     NoODR("no-odr",
149           desc("Do not use ODR (One Definition Rule) for type uniquing."),
150           init(false), cat(DsymCategory));
151 
152 static opt<bool> DumpDebugMap(
153     "dump-debug-map",
154     desc("Parse and dump the debug map to standard output. Not DWARF link "
155          "will take place."),
156     init(false), cat(DsymCategory));
157 
158 static opt<bool> InputIsYAMLDebugMap(
159     "y", desc("Treat the input file is a YAML debug map rather than a binary."),
160     init(false), cat(DsymCategory));
161 
162 static opt<bool> Verify("verify", desc("Verify the linked DWARF debug info."),
163                         cat(DsymCategory));
164 
165 static opt<std::string>
166     Toolchain("toolchain", desc("Embed toolchain information in dSYM bundle."),
167               cat(DsymCategory));
168 
169 static opt<bool>
170     PaperTrailWarnings("papertrail",
171                        desc("Embed warnings in the linked DWARF debug info."),
172                        cat(DsymCategory));
173 
createPlistFile(llvm::StringRef Bin,llvm::StringRef BundleRoot)174 static Error createPlistFile(llvm::StringRef Bin, llvm::StringRef BundleRoot) {
175   if (NoOutput)
176     return Error::success();
177 
178   // Create plist file to write to.
179   llvm::SmallString<128> InfoPlist(BundleRoot);
180   llvm::sys::path::append(InfoPlist, "Contents/Info.plist");
181   std::error_code EC;
182   llvm::raw_fd_ostream PL(InfoPlist, EC, llvm::sys::fs::F_Text);
183   if (EC)
184     return make_error<StringError>(
185         "cannot create Plist: " + toString(errorCodeToError(EC)), EC);
186 
187   CFBundleInfo BI = getBundleInfo(Bin);
188 
189   if (BI.IDStr.empty()) {
190     llvm::StringRef BundleID = *llvm::sys::path::rbegin(BundleRoot);
191     if (llvm::sys::path::extension(BundleRoot) == ".dSYM")
192       BI.IDStr = llvm::sys::path::stem(BundleID);
193     else
194       BI.IDStr = BundleID;
195   }
196 
197   // Print out information to the plist file.
198   PL << "<?xml version=\"1.0\" encoding=\"UTF-8\"\?>\n"
199      << "<!DOCTYPE plist PUBLIC \"-//Apple Computer//DTD PLIST 1.0//EN\" "
200      << "\"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">\n"
201      << "<plist version=\"1.0\">\n"
202      << "\t<dict>\n"
203      << "\t\t<key>CFBundleDevelopmentRegion</key>\n"
204      << "\t\t<string>English</string>\n"
205      << "\t\t<key>CFBundleIdentifier</key>\n"
206      << "\t\t<string>com.apple.xcode.dsym." << BI.IDStr << "</string>\n"
207      << "\t\t<key>CFBundleInfoDictionaryVersion</key>\n"
208      << "\t\t<string>6.0</string>\n"
209      << "\t\t<key>CFBundlePackageType</key>\n"
210      << "\t\t<string>dSYM</string>\n"
211      << "\t\t<key>CFBundleSignature</key>\n"
212      << "\t\t<string>\?\?\?\?</string>\n";
213 
214   if (!BI.OmitShortVersion()) {
215     PL << "\t\t<key>CFBundleShortVersionString</key>\n";
216     PL << "\t\t<string>";
217     printHTMLEscaped(BI.ShortVersionStr, PL);
218     PL << "</string>\n";
219   }
220 
221   PL << "\t\t<key>CFBundleVersion</key>\n";
222   PL << "\t\t<string>";
223   printHTMLEscaped(BI.VersionStr, PL);
224   PL << "</string>\n";
225 
226   if (!Toolchain.empty()) {
227     PL << "\t\t<key>Toolchain</key>\n";
228     PL << "\t\t<string>";
229     printHTMLEscaped(Toolchain, PL);
230     PL << "</string>\n";
231   }
232 
233   PL << "\t</dict>\n"
234      << "</plist>\n";
235 
236   PL.close();
237   return Error::success();
238 }
239 
createBundleDir(llvm::StringRef BundleBase)240 static Error createBundleDir(llvm::StringRef BundleBase) {
241   if (NoOutput)
242     return Error::success();
243 
244   llvm::SmallString<128> Bundle(BundleBase);
245   llvm::sys::path::append(Bundle, "Contents", "Resources", "DWARF");
246   if (std::error_code EC =
247           create_directories(Bundle.str(), true, llvm::sys::fs::perms::all_all))
248     return make_error<StringError>(
249         "cannot create bundle: " + toString(errorCodeToError(EC)), EC);
250 
251   return Error::success();
252 }
253 
verify(llvm::StringRef OutputFile,llvm::StringRef Arch)254 static bool verify(llvm::StringRef OutputFile, llvm::StringRef Arch) {
255   if (OutputFile == "-") {
256     WithColor::warning() << "verification skipped for " << Arch
257                          << "because writing to stdout.\n";
258     return true;
259   }
260 
261   Expected<OwningBinary<Binary>> BinOrErr = createBinary(OutputFile);
262   if (!BinOrErr) {
263     WithColor::error() << OutputFile << ": " << toString(BinOrErr.takeError());
264     return false;
265   }
266 
267   Binary &Binary = *BinOrErr.get().getBinary();
268   if (auto *Obj = dyn_cast<MachOObjectFile>(&Binary)) {
269     raw_ostream &os = Verbose ? errs() : nulls();
270     os << "Verifying DWARF for architecture: " << Arch << "\n";
271     std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(*Obj);
272     DIDumpOptions DumpOpts;
273     bool success = DICtx->verify(os, DumpOpts.noImplicitRecursion());
274     if (!success)
275       WithColor::error() << "verification failed for " << Arch << '\n';
276     return success;
277   }
278 
279   return false;
280 }
281 
getOutputFileName(llvm::StringRef InputFile)282 static Expected<std::string> getOutputFileName(llvm::StringRef InputFile) {
283   if (OutputFileOpt == "-")
284     return OutputFileOpt;
285 
286   // When updating, do in place replacement.
287   if (OutputFileOpt.empty() && (Update || !SymbolMap.empty()))
288     return InputFile;
289 
290   // If a flat dSYM has been requested, things are pretty simple.
291   if (FlatOut) {
292     if (OutputFileOpt.empty()) {
293       if (InputFile == "-")
294         return "a.out.dwarf";
295       return (InputFile + ".dwarf").str();
296     }
297 
298     return OutputFileOpt;
299   }
300 
301   // We need to create/update a dSYM bundle.
302   // A bundle hierarchy looks like this:
303   //   <bundle name>.dSYM/
304   //       Contents/
305   //          Info.plist
306   //          Resources/
307   //             DWARF/
308   //                <DWARF file(s)>
309   std::string DwarfFile =
310       InputFile == "-" ? llvm::StringRef("a.out") : InputFile;
311   llvm::SmallString<128> BundleDir(OutputFileOpt);
312   if (BundleDir.empty())
313     BundleDir = DwarfFile + ".dSYM";
314   if (auto E = createBundleDir(BundleDir))
315     return std::move(E);
316   if (auto E = createPlistFile(DwarfFile, BundleDir))
317     return std::move(E);
318 
319   llvm::sys::path::append(BundleDir, "Contents", "Resources", "DWARF",
320                           llvm::sys::path::filename(DwarfFile));
321   return BundleDir.str();
322 }
323 
324 /// Parses the command line options into the LinkOptions struct and performs
325 /// some sanity checking. Returns an error in case the latter fails.
getOptions()326 static Expected<LinkOptions> getOptions() {
327   LinkOptions Options;
328 
329   Options.Verbose = Verbose;
330   Options.NoOutput = NoOutput;
331   Options.NoODR = NoODR;
332   Options.Minimize = Minimize;
333   Options.Update = Update;
334   Options.NoTimestamp = NoTimestamp;
335   Options.PrependPath = OsoPrependPath;
336   Options.TheAccelTableKind = AcceleratorTable;
337 
338   if (!SymbolMap.empty())
339     Options.Update = true;
340 
341   if (Assembly)
342     Options.FileType = OutputFileType::Assembly;
343 
344   if (Options.Update && std::find(InputFiles.begin(), InputFiles.end(), "-") !=
345                             InputFiles.end()) {
346     // FIXME: We cannot use stdin for an update because stdin will be
347     // consumed by the BinaryHolder during the debugmap parsing, and
348     // then we will want to consume it again in DwarfLinker. If we
349     // used a unique BinaryHolder object that could cache multiple
350     // binaries this restriction would go away.
351     return make_error<StringError>(
352         "standard input cannot be used as input for a dSYM update.",
353         inconvertibleErrorCode());
354   }
355 
356   if (NumThreads == 0)
357     Options.Threads = llvm::thread::hardware_concurrency();
358   if (DumpDebugMap || Verbose)
359     Options.Threads = 1;
360 
361   return Options;
362 }
363 
364 /// Return a list of input files. This function has logic for dealing with the
365 /// special case where we might have dSYM bundles as input. The function
366 /// returns an error when the directory structure doesn't match that of a dSYM
367 /// bundle.
getInputs(bool DsymAsInput)368 static Expected<std::vector<std::string>> getInputs(bool DsymAsInput) {
369   if (!DsymAsInput)
370     return InputFiles;
371 
372   // If we are updating, we might get dSYM bundles as input.
373   std::vector<std::string> Inputs;
374   for (const auto &Input : InputFiles) {
375     if (!llvm::sys::fs::is_directory(Input)) {
376       Inputs.push_back(Input);
377       continue;
378     }
379 
380     // Make sure that we're dealing with a dSYM bundle.
381     SmallString<256> BundlePath(Input);
382     sys::path::append(BundlePath, "Contents", "Resources", "DWARF");
383     if (!llvm::sys::fs::is_directory(BundlePath))
384       return make_error<StringError>(
385           Input + " is a directory, but doesn't look like a dSYM bundle.",
386           inconvertibleErrorCode());
387 
388     // Create a directory iterator to iterate over all the entries in the
389     // bundle.
390     std::error_code EC;
391     llvm::sys::fs::directory_iterator DirIt(BundlePath, EC);
392     llvm::sys::fs::directory_iterator DirEnd;
393     if (EC)
394       return errorCodeToError(EC);
395 
396     // Add each entry to the list of inputs.
397     while (DirIt != DirEnd) {
398       Inputs.push_back(DirIt->path());
399       DirIt.increment(EC);
400       if (EC)
401         return errorCodeToError(EC);
402     }
403   }
404   return Inputs;
405 }
406 
main(int argc,char ** argv)407 int main(int argc, char **argv) {
408   InitLLVM X(argc, argv);
409 
410   void *P = (void *)(intptr_t)getOutputFileName;
411   std::string SDKPath = llvm::sys::fs::getMainExecutable(argv[0], P);
412   SDKPath = llvm::sys::path::parent_path(SDKPath);
413 
414   HideUnrelatedOptions({&DsymCategory, &ColorCategory});
415   llvm::cl::ParseCommandLineOptions(
416       argc, argv,
417       "manipulate archived DWARF debug symbol files.\n\n"
418       "dsymutil links the DWARF debug information found in the object files\n"
419       "for the executable <input file> by using debug symbols information\n"
420       "contained in its symbol table.\n");
421 
422   if (Help) {
423     PrintHelpMessage();
424     return 0;
425   }
426 
427   if (Version) {
428     llvm::cl::PrintVersionMessage();
429     return 0;
430   }
431 
432   auto OptionsOrErr = getOptions();
433   if (!OptionsOrErr) {
434     WithColor::error() << toString(OptionsOrErr.takeError());
435     return 1;
436   }
437 
438   llvm::InitializeAllTargetInfos();
439   llvm::InitializeAllTargetMCs();
440   llvm::InitializeAllTargets();
441   llvm::InitializeAllAsmPrinters();
442 
443   auto InputsOrErr = getInputs(OptionsOrErr->Update);
444   if (!InputsOrErr) {
445     WithColor::error() << toString(InputsOrErr.takeError()) << '\n';
446     return 1;
447   }
448 
449   if (!FlatOut && OutputFileOpt == "-") {
450     WithColor::error() << "cannot emit to standard output without --flat\n";
451     return 1;
452   }
453 
454   if (InputsOrErr->size() > 1 && FlatOut && !OutputFileOpt.empty()) {
455     WithColor::error() << "cannot use -o with multiple inputs in flat mode\n";
456     return 1;
457   }
458 
459   if (InputFiles.size() > 1 && !SymbolMap.empty() &&
460       !llvm::sys::fs::is_directory(SymbolMap)) {
461     WithColor::error() << "when unobfuscating multiple files, --symbol-map "
462                        << "needs to point to a directory.\n";
463     return 1;
464   }
465 
466   if (getenv("RC_DEBUG_OPTIONS"))
467     PaperTrailWarnings = true;
468 
469   if (PaperTrailWarnings && InputIsYAMLDebugMap)
470     WithColor::warning()
471         << "Paper trail warnings are not supported for YAML input";
472 
473   for (const auto &Arch : ArchFlags)
474     if (Arch != "*" && Arch != "all" &&
475         !llvm::object::MachOObjectFile::isValidArch(Arch)) {
476       WithColor::error() << "unsupported cpu architecture: '" << Arch << "'\n";
477       return 1;
478     }
479 
480   SymbolMapLoader SymMapLoader(SymbolMap);
481 
482   for (auto &InputFile : *InputsOrErr) {
483     // Dump the symbol table for each input file and requested arch
484     if (DumpStab) {
485       if (!dumpStab(InputFile, ArchFlags, OsoPrependPath))
486         return 1;
487       continue;
488     }
489 
490     auto DebugMapPtrsOrErr =
491         parseDebugMap(InputFile, ArchFlags, OsoPrependPath, PaperTrailWarnings,
492                       Verbose, InputIsYAMLDebugMap);
493 
494     if (auto EC = DebugMapPtrsOrErr.getError()) {
495       WithColor::error() << "cannot parse the debug map for '" << InputFile
496                          << "': " << EC.message() << '\n';
497       return 1;
498     }
499 
500     if (OptionsOrErr->Update) {
501       // The debug map should be empty. Add one object file corresponding to
502       // the input file.
503       for (auto &Map : *DebugMapPtrsOrErr)
504         Map->addDebugMapObject(InputFile,
505                                llvm::sys::TimePoint<std::chrono::seconds>());
506     }
507 
508     // Ensure that the debug map is not empty (anymore).
509     if (DebugMapPtrsOrErr->empty()) {
510       WithColor::error() << "no architecture to link\n";
511       return 1;
512     }
513 
514     // Shared a single binary holder for all the link steps.
515     BinaryHolder BinHolder;
516 
517     NumThreads =
518         std::min<unsigned>(OptionsOrErr->Threads, DebugMapPtrsOrErr->size());
519     llvm::ThreadPool Threads(NumThreads);
520 
521     // If there is more than one link to execute, we need to generate
522     // temporary files.
523     bool NeedsTempFiles =
524         !DumpDebugMap && (OutputFileOpt != "-") &&
525         (DebugMapPtrsOrErr->size() != 1 || OptionsOrErr->Update);
526 
527     llvm::SmallVector<MachOUtils::ArchAndFile, 4> TempFiles;
528     std::atomic_char AllOK(1);
529     for (auto &Map : *DebugMapPtrsOrErr) {
530       if (Verbose || DumpDebugMap)
531         Map->print(llvm::outs());
532 
533       if (DumpDebugMap)
534         continue;
535 
536       if (!SymbolMap.empty())
537         OptionsOrErr->Translator = SymMapLoader.Load(InputFile, *Map);
538 
539       if (Map->begin() == Map->end())
540         WithColor::warning()
541             << "no debug symbols in executable (-arch "
542             << MachOUtils::getArchName(Map->getTriple().getArchName()) << ")\n";
543 
544       // Using a std::shared_ptr rather than std::unique_ptr because move-only
545       // types don't work with std::bind in the ThreadPool implementation.
546       std::shared_ptr<raw_fd_ostream> OS;
547 
548       Expected<std::string> OutputFileOrErr = getOutputFileName(InputFile);
549       if (!OutputFileOrErr) {
550         WithColor::error() << toString(OutputFileOrErr.takeError());
551         return 1;
552       }
553 
554       std::string OutputFile = *OutputFileOrErr;
555       if (NeedsTempFiles) {
556         TempFiles.emplace_back(Map->getTriple().getArchName().str());
557 
558         auto E = TempFiles.back().createTempFile();
559         if (E) {
560           WithColor::error() << toString(std::move(E));
561           return 1;
562         }
563 
564         auto &TempFile = *(TempFiles.back().File);
565         OS = std::make_shared<raw_fd_ostream>(TempFile.FD,
566                                               /*shouldClose*/ false);
567         OutputFile = TempFile.TmpName;
568       } else {
569         std::error_code EC;
570         OS = std::make_shared<raw_fd_ostream>(NoOutput ? "-" : OutputFile, EC,
571                                               sys::fs::F_None);
572         if (EC) {
573           WithColor::error() << OutputFile << ": " << EC.message();
574           return 1;
575         }
576       }
577 
578       auto LinkLambda = [&,
579                          OutputFile](std::shared_ptr<raw_fd_ostream> Stream) {
580         AllOK.fetch_and(linkDwarf(*Stream, BinHolder, *Map, *OptionsOrErr));
581         Stream->flush();
582         if (Verify && !NoOutput)
583           AllOK.fetch_and(verify(OutputFile, Map->getTriple().getArchName()));
584       };
585 
586       // FIXME: The DwarfLinker can have some very deep recursion that can max
587       // out the (significantly smaller) stack when using threads. We don't
588       // want this limitation when we only have a single thread.
589       if (NumThreads == 1)
590         LinkLambda(OS);
591       else
592         Threads.async(LinkLambda, OS);
593     }
594 
595     Threads.wait();
596 
597     if (!AllOK)
598       return 1;
599 
600     if (NeedsTempFiles) {
601       Expected<std::string> OutputFileOrErr = getOutputFileName(InputFile);
602       if (!OutputFileOrErr) {
603         WithColor::error() << toString(OutputFileOrErr.takeError());
604         return 1;
605       }
606       if (!MachOUtils::generateUniversalBinary(TempFiles, *OutputFileOrErr,
607                                                *OptionsOrErr, SDKPath))
608         return 1;
609     }
610   }
611 
612   return 0;
613 }
614