1 //===-- llvm-objdump.cpp - Object file dumping utility for llvm -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This program is a utility that works like binutils "objdump", that is, it
10 // dumps out a plethora of information about an object file depending on the
11 // flags.
12 //
13 // The flags and output of this program should be near identical to those of
14 // binutils objdump.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm-objdump.h"
19 #include "COFFDump.h"
20 #include "ELFDump.h"
21 #include "MachODump.h"
22 #include "ObjdumpOptID.h"
23 #include "OffloadDump.h"
24 #include "SourcePrinter.h"
25 #include "WasmDump.h"
26 #include "XCOFFDump.h"
27 #include "llvm/ADT/IndexedMap.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/ADT/StringSet.h"
33 #include "llvm/ADT/Triple.h"
34 #include "llvm/ADT/Twine.h"
35 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
36 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
37 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
38 #include "llvm/Debuginfod/BuildIDFetcher.h"
39 #include "llvm/Debuginfod/Debuginfod.h"
40 #include "llvm/Debuginfod/HTTPClient.h"
41 #include "llvm/Demangle/Demangle.h"
42 #include "llvm/MC/MCAsmInfo.h"
43 #include "llvm/MC/MCContext.h"
44 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
45 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
46 #include "llvm/MC/MCInst.h"
47 #include "llvm/MC/MCInstPrinter.h"
48 #include "llvm/MC/MCInstrAnalysis.h"
49 #include "llvm/MC/MCInstrInfo.h"
50 #include "llvm/MC/MCObjectFileInfo.h"
51 #include "llvm/MC/MCRegisterInfo.h"
52 #include "llvm/MC/MCSubtargetInfo.h"
53 #include "llvm/MC/MCTargetOptions.h"
54 #include "llvm/MC/TargetRegistry.h"
55 #include "llvm/Object/Archive.h"
56 #include "llvm/Object/BuildID.h"
57 #include "llvm/Object/COFF.h"
58 #include "llvm/Object/COFFImportFile.h"
59 #include "llvm/Object/ELFObjectFile.h"
60 #include "llvm/Object/ELFTypes.h"
61 #include "llvm/Object/FaultMapParser.h"
62 #include "llvm/Object/MachO.h"
63 #include "llvm/Object/MachOUniversal.h"
64 #include "llvm/Object/ObjectFile.h"
65 #include "llvm/Object/OffloadBinary.h"
66 #include "llvm/Object/Wasm.h"
67 #include "llvm/Option/Arg.h"
68 #include "llvm/Option/ArgList.h"
69 #include "llvm/Option/Option.h"
70 #include "llvm/Support/Casting.h"
71 #include "llvm/Support/Debug.h"
72 #include "llvm/Support/Errc.h"
73 #include "llvm/Support/FileSystem.h"
74 #include "llvm/Support/Format.h"
75 #include "llvm/Support/FormatVariadic.h"
76 #include "llvm/Support/GraphWriter.h"
77 #include "llvm/Support/Host.h"
78 #include "llvm/Support/InitLLVM.h"
79 #include "llvm/Support/MemoryBuffer.h"
80 #include "llvm/Support/SourceMgr.h"
81 #include "llvm/Support/StringSaver.h"
82 #include "llvm/Support/TargetSelect.h"
83 #include "llvm/Support/WithColor.h"
84 #include "llvm/Support/raw_ostream.h"
85 #include <algorithm>
86 #include <cctype>
87 #include <cstring>
88 #include <optional>
89 #include <system_error>
90 #include <unordered_map>
91 #include <utility>
92 
93 using namespace llvm;
94 using namespace llvm::object;
95 using namespace llvm::objdump;
96 using namespace llvm::opt;
97 
98 namespace {
99 
100 class CommonOptTable : public opt::GenericOptTable {
101 public:
CommonOptTable(ArrayRef<Info> OptionInfos,const char * Usage,const char * Description)102   CommonOptTable(ArrayRef<Info> OptionInfos, const char *Usage,
103                  const char *Description)
104       : opt::GenericOptTable(OptionInfos), Usage(Usage),
105         Description(Description) {
106     setGroupedShortOptions(true);
107   }
108 
printHelp(StringRef Argv0,bool ShowHidden=false) const109   void printHelp(StringRef Argv0, bool ShowHidden = false) const {
110     Argv0 = sys::path::filename(Argv0);
111     opt::GenericOptTable::printHelp(outs(), (Argv0 + Usage).str().c_str(),
112                                     Description, ShowHidden, ShowHidden);
113     // TODO Replace this with OptTable API once it adds extrahelp support.
114     outs() << "\nPass @FILE as argument to read options from FILE.\n";
115   }
116 
117 private:
118   const char *Usage;
119   const char *Description;
120 };
121 
122 // ObjdumpOptID is in ObjdumpOptID.h
123 namespace objdump_opt {
124 #define PREFIX(NAME, VALUE)                                                    \
125   static constexpr StringLiteral NAME##_init[] = VALUE;                        \
126   static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
127                                                 std::size(NAME##_init) - 1);
128 #include "ObjdumpOpts.inc"
129 #undef PREFIX
130 
131 static constexpr opt::OptTable::Info ObjdumpInfoTable[] = {
132 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
133                HELPTEXT, METAVAR, VALUES)                                      \
134   {PREFIX,          NAME,         HELPTEXT,                                    \
135    METAVAR,         OBJDUMP_##ID, opt::Option::KIND##Class,                    \
136    PARAM,           FLAGS,        OBJDUMP_##GROUP,                             \
137    OBJDUMP_##ALIAS, ALIASARGS,    VALUES},
138 #include "ObjdumpOpts.inc"
139 #undef OPTION
140 };
141 } // namespace objdump_opt
142 
143 class ObjdumpOptTable : public CommonOptTable {
144 public:
ObjdumpOptTable()145   ObjdumpOptTable()
146       : CommonOptTable(objdump_opt::ObjdumpInfoTable,
147                        " [options] <input object files>",
148                        "llvm object file dumper") {}
149 };
150 
151 enum OtoolOptID {
152   OTOOL_INVALID = 0, // This is not an option ID.
153 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
154                HELPTEXT, METAVAR, VALUES)                                      \
155   OTOOL_##ID,
156 #include "OtoolOpts.inc"
157 #undef OPTION
158 };
159 
160 namespace otool {
161 #define PREFIX(NAME, VALUE)                                                    \
162   static constexpr StringLiteral NAME##_init[] = VALUE;                        \
163   static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
164                                                 std::size(NAME##_init) - 1);
165 #include "OtoolOpts.inc"
166 #undef PREFIX
167 
168 static constexpr opt::OptTable::Info OtoolInfoTable[] = {
169 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
170                HELPTEXT, METAVAR, VALUES)                                      \
171   {PREFIX,        NAME,       HELPTEXT,                                        \
172    METAVAR,       OTOOL_##ID, opt::Option::KIND##Class,                        \
173    PARAM,         FLAGS,      OTOOL_##GROUP,                                   \
174    OTOOL_##ALIAS, ALIASARGS,  VALUES},
175 #include "OtoolOpts.inc"
176 #undef OPTION
177 };
178 } // namespace otool
179 
180 class OtoolOptTable : public CommonOptTable {
181 public:
OtoolOptTable()182   OtoolOptTable()
183       : CommonOptTable(otool::OtoolInfoTable, " [option...] [file...]",
184                        "Mach-O object file displaying tool") {}
185 };
186 
187 } // namespace
188 
189 #define DEBUG_TYPE "objdump"
190 
191 static uint64_t AdjustVMA;
192 static bool AllHeaders;
193 static std::string ArchName;
194 bool objdump::ArchiveHeaders;
195 bool objdump::Demangle;
196 bool objdump::Disassemble;
197 bool objdump::DisassembleAll;
198 bool objdump::SymbolDescription;
199 static std::vector<std::string> DisassembleSymbols;
200 static bool DisassembleZeroes;
201 static std::vector<std::string> DisassemblerOptions;
202 DIDumpType objdump::DwarfDumpType;
203 static bool DynamicRelocations;
204 static bool FaultMapSection;
205 static bool FileHeaders;
206 bool objdump::SectionContents;
207 static std::vector<std::string> InputFilenames;
208 bool objdump::PrintLines;
209 static bool MachOOpt;
210 std::string objdump::MCPU;
211 std::vector<std::string> objdump::MAttrs;
212 bool objdump::ShowRawInsn;
213 bool objdump::LeadingAddr;
214 static bool Offloading;
215 static bool RawClangAST;
216 bool objdump::Relocations;
217 bool objdump::PrintImmHex;
218 bool objdump::PrivateHeaders;
219 std::vector<std::string> objdump::FilterSections;
220 bool objdump::SectionHeaders;
221 static bool ShowAllSymbols;
222 static bool ShowLMA;
223 bool objdump::PrintSource;
224 
225 static uint64_t StartAddress;
226 static bool HasStartAddressFlag;
227 static uint64_t StopAddress = UINT64_MAX;
228 static bool HasStopAddressFlag;
229 
230 bool objdump::SymbolTable;
231 static bool SymbolizeOperands;
232 static bool DynamicSymbolTable;
233 std::string objdump::TripleName;
234 bool objdump::UnwindInfo;
235 static bool Wide;
236 std::string objdump::Prefix;
237 uint32_t objdump::PrefixStrip;
238 
239 DebugVarsFormat objdump::DbgVariables = DVDisabled;
240 
241 int objdump::DbgIndent = 52;
242 
243 static StringSet<> DisasmSymbolSet;
244 StringSet<> objdump::FoundSectionSet;
245 static StringRef ToolName;
246 
247 std::unique_ptr<BuildIDFetcher> BIDFetcher;
248 ExitOnError ExitOnErr;
249 
250 namespace {
251 struct FilterResult {
252   // True if the section should not be skipped.
253   bool Keep;
254 
255   // True if the index counter should be incremented, even if the section should
256   // be skipped. For example, sections may be skipped if they are not included
257   // in the --section flag, but we still want those to count toward the section
258   // count.
259   bool IncrementIndex;
260 };
261 } // namespace
262 
checkSectionFilter(object::SectionRef S)263 static FilterResult checkSectionFilter(object::SectionRef S) {
264   if (FilterSections.empty())
265     return {/*Keep=*/true, /*IncrementIndex=*/true};
266 
267   Expected<StringRef> SecNameOrErr = S.getName();
268   if (!SecNameOrErr) {
269     consumeError(SecNameOrErr.takeError());
270     return {/*Keep=*/false, /*IncrementIndex=*/false};
271   }
272   StringRef SecName = *SecNameOrErr;
273 
274   // StringSet does not allow empty key so avoid adding sections with
275   // no name (such as the section with index 0) here.
276   if (!SecName.empty())
277     FoundSectionSet.insert(SecName);
278 
279   // Only show the section if it's in the FilterSections list, but always
280   // increment so the indexing is stable.
281   return {/*Keep=*/is_contained(FilterSections, SecName),
282           /*IncrementIndex=*/true};
283 }
284 
ToolSectionFilter(object::ObjectFile const & O,uint64_t * Idx)285 SectionFilter objdump::ToolSectionFilter(object::ObjectFile const &O,
286                                          uint64_t *Idx) {
287   // Start at UINT64_MAX so that the first index returned after an increment is
288   // zero (after the unsigned wrap).
289   if (Idx)
290     *Idx = UINT64_MAX;
291   return SectionFilter(
292       [Idx](object::SectionRef S) {
293         FilterResult Result = checkSectionFilter(S);
294         if (Idx != nullptr && Result.IncrementIndex)
295           *Idx += 1;
296         return Result.Keep;
297       },
298       O);
299 }
300 
getFileNameForError(const object::Archive::Child & C,unsigned Index)301 std::string objdump::getFileNameForError(const object::Archive::Child &C,
302                                          unsigned Index) {
303   Expected<StringRef> NameOrErr = C.getName();
304   if (NameOrErr)
305     return std::string(NameOrErr.get());
306   // If we have an error getting the name then we print the index of the archive
307   // member. Since we are already in an error state, we just ignore this error.
308   consumeError(NameOrErr.takeError());
309   return "<file index: " + std::to_string(Index) + ">";
310 }
311 
reportWarning(const Twine & Message,StringRef File)312 void objdump::reportWarning(const Twine &Message, StringRef File) {
313   // Output order between errs() and outs() matters especially for archive
314   // files where the output is per member object.
315   outs().flush();
316   WithColor::warning(errs(), ToolName)
317       << "'" << File << "': " << Message << "\n";
318 }
319 
reportError(StringRef File,const Twine & Message)320 [[noreturn]] void objdump::reportError(StringRef File, const Twine &Message) {
321   outs().flush();
322   WithColor::error(errs(), ToolName) << "'" << File << "': " << Message << "\n";
323   exit(1);
324 }
325 
reportError(Error E,StringRef FileName,StringRef ArchiveName,StringRef ArchitectureName)326 [[noreturn]] void objdump::reportError(Error E, StringRef FileName,
327                                        StringRef ArchiveName,
328                                        StringRef ArchitectureName) {
329   assert(E);
330   outs().flush();
331   WithColor::error(errs(), ToolName);
332   if (ArchiveName != "")
333     errs() << ArchiveName << "(" << FileName << ")";
334   else
335     errs() << "'" << FileName << "'";
336   if (!ArchitectureName.empty())
337     errs() << " (for architecture " << ArchitectureName << ")";
338   errs() << ": ";
339   logAllUnhandledErrors(std::move(E), errs());
340   exit(1);
341 }
342 
reportCmdLineWarning(const Twine & Message)343 static void reportCmdLineWarning(const Twine &Message) {
344   WithColor::warning(errs(), ToolName) << Message << "\n";
345 }
346 
reportCmdLineError(const Twine & Message)347 [[noreturn]] static void reportCmdLineError(const Twine &Message) {
348   WithColor::error(errs(), ToolName) << Message << "\n";
349   exit(1);
350 }
351 
warnOnNoMatchForSections()352 static void warnOnNoMatchForSections() {
353   SetVector<StringRef> MissingSections;
354   for (StringRef S : FilterSections) {
355     if (FoundSectionSet.count(S))
356       return;
357     // User may specify a unnamed section. Don't warn for it.
358     if (!S.empty())
359       MissingSections.insert(S);
360   }
361 
362   // Warn only if no section in FilterSections is matched.
363   for (StringRef S : MissingSections)
364     reportCmdLineWarning("section '" + S +
365                          "' mentioned in a -j/--section option, but not "
366                          "found in any input file");
367 }
368 
getTarget(const ObjectFile * Obj)369 static const Target *getTarget(const ObjectFile *Obj) {
370   // Figure out the target triple.
371   Triple TheTriple("unknown-unknown-unknown");
372   if (TripleName.empty()) {
373     TheTriple = Obj->makeTriple();
374   } else {
375     TheTriple.setTriple(Triple::normalize(TripleName));
376     auto Arch = Obj->getArch();
377     if (Arch == Triple::arm || Arch == Triple::armeb)
378       Obj->setARMSubArch(TheTriple);
379   }
380 
381   // Get the target specific parser.
382   std::string Error;
383   const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
384                                                          Error);
385   if (!TheTarget)
386     reportError(Obj->getFileName(), "can't find target: " + Error);
387 
388   // Update the triple name and return the found target.
389   TripleName = TheTriple.getTriple();
390   return TheTarget;
391 }
392 
isRelocAddressLess(RelocationRef A,RelocationRef B)393 bool objdump::isRelocAddressLess(RelocationRef A, RelocationRef B) {
394   return A.getOffset() < B.getOffset();
395 }
396 
getRelocationValueString(const RelocationRef & Rel,SmallVectorImpl<char> & Result)397 static Error getRelocationValueString(const RelocationRef &Rel,
398                                       SmallVectorImpl<char> &Result) {
399   const ObjectFile *Obj = Rel.getObject();
400   if (auto *ELF = dyn_cast<ELFObjectFileBase>(Obj))
401     return getELFRelocationValueString(ELF, Rel, Result);
402   if (auto *COFF = dyn_cast<COFFObjectFile>(Obj))
403     return getCOFFRelocationValueString(COFF, Rel, Result);
404   if (auto *Wasm = dyn_cast<WasmObjectFile>(Obj))
405     return getWasmRelocationValueString(Wasm, Rel, Result);
406   if (auto *MachO = dyn_cast<MachOObjectFile>(Obj))
407     return getMachORelocationValueString(MachO, Rel, Result);
408   if (auto *XCOFF = dyn_cast<XCOFFObjectFile>(Obj))
409     return getXCOFFRelocationValueString(*XCOFF, Rel, Result);
410   llvm_unreachable("unknown object file format");
411 }
412 
413 /// Indicates whether this relocation should hidden when listing
414 /// relocations, usually because it is the trailing part of a multipart
415 /// relocation that will be printed as part of the leading relocation.
getHidden(RelocationRef RelRef)416 static bool getHidden(RelocationRef RelRef) {
417   auto *MachO = dyn_cast<MachOObjectFile>(RelRef.getObject());
418   if (!MachO)
419     return false;
420 
421   unsigned Arch = MachO->getArch();
422   DataRefImpl Rel = RelRef.getRawDataRefImpl();
423   uint64_t Type = MachO->getRelocationType(Rel);
424 
425   // On arches that use the generic relocations, GENERIC_RELOC_PAIR
426   // is always hidden.
427   if (Arch == Triple::x86 || Arch == Triple::arm || Arch == Triple::ppc)
428     return Type == MachO::GENERIC_RELOC_PAIR;
429 
430   if (Arch == Triple::x86_64) {
431     // On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows
432     // an X86_64_RELOC_SUBTRACTOR.
433     if (Type == MachO::X86_64_RELOC_UNSIGNED && Rel.d.a > 0) {
434       DataRefImpl RelPrev = Rel;
435       RelPrev.d.a--;
436       uint64_t PrevType = MachO->getRelocationType(RelPrev);
437       if (PrevType == MachO::X86_64_RELOC_SUBTRACTOR)
438         return true;
439     }
440   }
441 
442   return false;
443 }
444 
445 namespace {
446 
447 /// Get the column at which we want to start printing the instruction
448 /// disassembly, taking into account anything which appears to the left of it.
getInstStartColumn(const MCSubtargetInfo & STI)449 unsigned getInstStartColumn(const MCSubtargetInfo &STI) {
450   return !ShowRawInsn ? 16 : STI.getTargetTriple().isX86() ? 40 : 24;
451 }
452 
isAArch64Elf(const ObjectFile & Obj)453 static bool isAArch64Elf(const ObjectFile &Obj) {
454   const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj);
455   return Elf && Elf->getEMachine() == ELF::EM_AARCH64;
456 }
457 
isArmElf(const ObjectFile & Obj)458 static bool isArmElf(const ObjectFile &Obj) {
459   const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj);
460   return Elf && Elf->getEMachine() == ELF::EM_ARM;
461 }
462 
isCSKYElf(const ObjectFile & Obj)463 static bool isCSKYElf(const ObjectFile &Obj) {
464   const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj);
465   return Elf && Elf->getEMachine() == ELF::EM_CSKY;
466 }
467 
hasMappingSymbols(const ObjectFile & Obj)468 static bool hasMappingSymbols(const ObjectFile &Obj) {
469   return isArmElf(Obj) || isAArch64Elf(Obj) || isCSKYElf(Obj) ;
470 }
471 
isMappingSymbol(const SymbolInfoTy & Sym)472 static bool isMappingSymbol(const SymbolInfoTy &Sym) {
473   return Sym.Name.startswith("$d") || Sym.Name.startswith("$x") ||
474          Sym.Name.startswith("$a") || Sym.Name.startswith("$t");
475 }
476 
printRelocation(formatted_raw_ostream & OS,StringRef FileName,const RelocationRef & Rel,uint64_t Address,bool Is64Bits)477 static void printRelocation(formatted_raw_ostream &OS, StringRef FileName,
478                             const RelocationRef &Rel, uint64_t Address,
479                             bool Is64Bits) {
480   StringRef Fmt = Is64Bits ? "%016" PRIx64 ":  " : "%08" PRIx64 ":  ";
481   SmallString<16> Name;
482   SmallString<32> Val;
483   Rel.getTypeName(Name);
484   if (Error E = getRelocationValueString(Rel, Val))
485     reportError(std::move(E), FileName);
486   OS << (Is64Bits || !LeadingAddr ? "\t\t" : "\t\t\t");
487   if (LeadingAddr)
488     OS << format(Fmt.data(), Address);
489   OS << Name << "\t" << Val;
490 }
491 
AlignToInstStartColumn(size_t Start,const MCSubtargetInfo & STI,raw_ostream & OS)492 static void AlignToInstStartColumn(size_t Start, const MCSubtargetInfo &STI,
493                                    raw_ostream &OS) {
494   // The output of printInst starts with a tab. Print some spaces so that
495   // the tab has 1 column and advances to the target tab stop.
496   unsigned TabStop = getInstStartColumn(STI);
497   unsigned Column = OS.tell() - Start;
498   OS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8);
499 }
500 
501 class PrettyPrinter {
502 public:
503   virtual ~PrettyPrinter() = default;
504   virtual void
printInst(MCInstPrinter & IP,const MCInst * MI,ArrayRef<uint8_t> Bytes,object::SectionedAddress Address,formatted_raw_ostream & OS,StringRef Annot,MCSubtargetInfo const & STI,SourcePrinter * SP,StringRef ObjectFilename,std::vector<RelocationRef> * Rels,LiveVariablePrinter & LVP)505   printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
506             object::SectionedAddress Address, formatted_raw_ostream &OS,
507             StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
508             StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
509             LiveVariablePrinter &LVP) {
510     if (SP && (PrintSource || PrintLines))
511       SP->printSourceLine(OS, Address, ObjectFilename, LVP);
512     LVP.printBetweenInsts(OS, false);
513 
514     size_t Start = OS.tell();
515     if (LeadingAddr)
516       OS << format("%8" PRIx64 ":", Address.Address);
517     if (ShowRawInsn) {
518       OS << ' ';
519       dumpBytes(Bytes, OS);
520     }
521 
522     AlignToInstStartColumn(Start, STI, OS);
523 
524     if (MI) {
525       // See MCInstPrinter::printInst. On targets where a PC relative immediate
526       // is relative to the next instruction and the length of a MCInst is
527       // difficult to measure (x86), this is the address of the next
528       // instruction.
529       uint64_t Addr =
530           Address.Address + (STI.getTargetTriple().isX86() ? Bytes.size() : 0);
531       IP.printInst(MI, Addr, "", STI, OS);
532     } else
533       OS << "\t<unknown>";
534   }
535 };
536 PrettyPrinter PrettyPrinterInst;
537 
538 class HexagonPrettyPrinter : public PrettyPrinter {
539 public:
printLead(ArrayRef<uint8_t> Bytes,uint64_t Address,formatted_raw_ostream & OS)540   void printLead(ArrayRef<uint8_t> Bytes, uint64_t Address,
541                  formatted_raw_ostream &OS) {
542     uint32_t opcode =
543       (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | Bytes[0];
544     if (LeadingAddr)
545       OS << format("%8" PRIx64 ":", Address);
546     if (ShowRawInsn) {
547       OS << "\t";
548       dumpBytes(Bytes.slice(0, 4), OS);
549       OS << format("\t%08" PRIx32, opcode);
550     }
551   }
printInst(MCInstPrinter & IP,const MCInst * MI,ArrayRef<uint8_t> Bytes,object::SectionedAddress Address,formatted_raw_ostream & OS,StringRef Annot,MCSubtargetInfo const & STI,SourcePrinter * SP,StringRef ObjectFilename,std::vector<RelocationRef> * Rels,LiveVariablePrinter & LVP)552   void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
553                  object::SectionedAddress Address, formatted_raw_ostream &OS,
554                  StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
555                  StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
556                  LiveVariablePrinter &LVP) override {
557     if (SP && (PrintSource || PrintLines))
558       SP->printSourceLine(OS, Address, ObjectFilename, LVP, "");
559     if (!MI) {
560       printLead(Bytes, Address.Address, OS);
561       OS << " <unknown>";
562       return;
563     }
564     std::string Buffer;
565     {
566       raw_string_ostream TempStream(Buffer);
567       IP.printInst(MI, Address.Address, "", STI, TempStream);
568     }
569     StringRef Contents(Buffer);
570     // Split off bundle attributes
571     auto PacketBundle = Contents.rsplit('\n');
572     // Split off first instruction from the rest
573     auto HeadTail = PacketBundle.first.split('\n');
574     auto Preamble = " { ";
575     auto Separator = "";
576 
577     // Hexagon's packets require relocations to be inline rather than
578     // clustered at the end of the packet.
579     std::vector<RelocationRef>::const_iterator RelCur = Rels->begin();
580     std::vector<RelocationRef>::const_iterator RelEnd = Rels->end();
581     auto PrintReloc = [&]() -> void {
582       while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address.Address)) {
583         if (RelCur->getOffset() == Address.Address) {
584           printRelocation(OS, ObjectFilename, *RelCur, Address.Address, false);
585           return;
586         }
587         ++RelCur;
588       }
589     };
590 
591     while (!HeadTail.first.empty()) {
592       OS << Separator;
593       Separator = "\n";
594       if (SP && (PrintSource || PrintLines))
595         SP->printSourceLine(OS, Address, ObjectFilename, LVP, "");
596       printLead(Bytes, Address.Address, OS);
597       OS << Preamble;
598       Preamble = "   ";
599       StringRef Inst;
600       auto Duplex = HeadTail.first.split('\v');
601       if (!Duplex.second.empty()) {
602         OS << Duplex.first;
603         OS << "; ";
604         Inst = Duplex.second;
605       }
606       else
607         Inst = HeadTail.first;
608       OS << Inst;
609       HeadTail = HeadTail.second.split('\n');
610       if (HeadTail.first.empty())
611         OS << " } " << PacketBundle.second;
612       PrintReloc();
613       Bytes = Bytes.slice(4);
614       Address.Address += 4;
615     }
616   }
617 };
618 HexagonPrettyPrinter HexagonPrettyPrinterInst;
619 
620 class AMDGCNPrettyPrinter : public PrettyPrinter {
621 public:
printInst(MCInstPrinter & IP,const MCInst * MI,ArrayRef<uint8_t> Bytes,object::SectionedAddress Address,formatted_raw_ostream & OS,StringRef Annot,MCSubtargetInfo const & STI,SourcePrinter * SP,StringRef ObjectFilename,std::vector<RelocationRef> * Rels,LiveVariablePrinter & LVP)622   void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
623                  object::SectionedAddress Address, formatted_raw_ostream &OS,
624                  StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
625                  StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
626                  LiveVariablePrinter &LVP) override {
627     if (SP && (PrintSource || PrintLines))
628       SP->printSourceLine(OS, Address, ObjectFilename, LVP);
629 
630     if (MI) {
631       SmallString<40> InstStr;
632       raw_svector_ostream IS(InstStr);
633 
634       IP.printInst(MI, Address.Address, "", STI, IS);
635 
636       OS << left_justify(IS.str(), 60);
637     } else {
638       // an unrecognized encoding - this is probably data so represent it
639       // using the .long directive, or .byte directive if fewer than 4 bytes
640       // remaining
641       if (Bytes.size() >= 4) {
642         OS << format("\t.long 0x%08" PRIx32 " ",
643                      support::endian::read32<support::little>(Bytes.data()));
644         OS.indent(42);
645       } else {
646           OS << format("\t.byte 0x%02" PRIx8, Bytes[0]);
647           for (unsigned int i = 1; i < Bytes.size(); i++)
648             OS << format(", 0x%02" PRIx8, Bytes[i]);
649           OS.indent(55 - (6 * Bytes.size()));
650       }
651     }
652 
653     OS << format("// %012" PRIX64 ":", Address.Address);
654     if (Bytes.size() >= 4) {
655       // D should be casted to uint32_t here as it is passed by format to
656       // snprintf as vararg.
657       for (uint32_t D :
658            ArrayRef(reinterpret_cast<const support::little32_t *>(Bytes.data()),
659                     Bytes.size() / 4))
660           OS << format(" %08" PRIX32, D);
661     } else {
662       for (unsigned char B : Bytes)
663         OS << format(" %02" PRIX8, B);
664     }
665 
666     if (!Annot.empty())
667       OS << " // " << Annot;
668   }
669 };
670 AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst;
671 
672 class BPFPrettyPrinter : public PrettyPrinter {
673 public:
printInst(MCInstPrinter & IP,const MCInst * MI,ArrayRef<uint8_t> Bytes,object::SectionedAddress Address,formatted_raw_ostream & OS,StringRef Annot,MCSubtargetInfo const & STI,SourcePrinter * SP,StringRef ObjectFilename,std::vector<RelocationRef> * Rels,LiveVariablePrinter & LVP)674   void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
675                  object::SectionedAddress Address, formatted_raw_ostream &OS,
676                  StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
677                  StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
678                  LiveVariablePrinter &LVP) override {
679     if (SP && (PrintSource || PrintLines))
680       SP->printSourceLine(OS, Address, ObjectFilename, LVP);
681     if (LeadingAddr)
682       OS << format("%8" PRId64 ":", Address.Address / 8);
683     if (ShowRawInsn) {
684       OS << "\t";
685       dumpBytes(Bytes, OS);
686     }
687     if (MI)
688       IP.printInst(MI, Address.Address, "", STI, OS);
689     else
690       OS << "\t<unknown>";
691   }
692 };
693 BPFPrettyPrinter BPFPrettyPrinterInst;
694 
695 class ARMPrettyPrinter : public PrettyPrinter {
696 public:
printInst(MCInstPrinter & IP,const MCInst * MI,ArrayRef<uint8_t> Bytes,object::SectionedAddress Address,formatted_raw_ostream & OS,StringRef Annot,MCSubtargetInfo const & STI,SourcePrinter * SP,StringRef ObjectFilename,std::vector<RelocationRef> * Rels,LiveVariablePrinter & LVP)697   void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
698                  object::SectionedAddress Address, formatted_raw_ostream &OS,
699                  StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
700                  StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
701                  LiveVariablePrinter &LVP) override {
702     if (SP && (PrintSource || PrintLines))
703       SP->printSourceLine(OS, Address, ObjectFilename, LVP);
704     LVP.printBetweenInsts(OS, false);
705 
706     size_t Start = OS.tell();
707     if (LeadingAddr)
708       OS << format("%8" PRIx64 ":", Address.Address);
709     if (ShowRawInsn) {
710       size_t Pos = 0, End = Bytes.size();
711       if (STI.checkFeatures("+thumb-mode")) {
712         for (; Pos + 2 <= End; Pos += 2)
713           OS << ' '
714              << format_hex_no_prefix(
715                     llvm::support::endian::read<uint16_t>(
716                         Bytes.data() + Pos, InstructionEndianness),
717                     4);
718       } else {
719         for (; Pos + 4 <= End; Pos += 4)
720           OS << ' '
721              << format_hex_no_prefix(
722                     llvm::support::endian::read<uint32_t>(
723                         Bytes.data() + Pos, InstructionEndianness),
724                     8);
725       }
726       if (Pos < End) {
727         OS << ' ';
728         dumpBytes(Bytes.slice(Pos), OS);
729       }
730     }
731 
732     AlignToInstStartColumn(Start, STI, OS);
733 
734     if (MI) {
735       IP.printInst(MI, Address.Address, "", STI, OS);
736     } else
737       OS << "\t<unknown>";
738   }
739 
setInstructionEndianness(llvm::support::endianness Endianness)740   void setInstructionEndianness(llvm::support::endianness Endianness) {
741     InstructionEndianness = Endianness;
742   }
743 
744 private:
745   llvm::support::endianness InstructionEndianness = llvm::support::little;
746 };
747 ARMPrettyPrinter ARMPrettyPrinterInst;
748 
749 class AArch64PrettyPrinter : public PrettyPrinter {
750 public:
printInst(MCInstPrinter & IP,const MCInst * MI,ArrayRef<uint8_t> Bytes,object::SectionedAddress Address,formatted_raw_ostream & OS,StringRef Annot,MCSubtargetInfo const & STI,SourcePrinter * SP,StringRef ObjectFilename,std::vector<RelocationRef> * Rels,LiveVariablePrinter & LVP)751   void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
752                  object::SectionedAddress Address, formatted_raw_ostream &OS,
753                  StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
754                  StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
755                  LiveVariablePrinter &LVP) override {
756     if (SP && (PrintSource || PrintLines))
757       SP->printSourceLine(OS, Address, ObjectFilename, LVP);
758     LVP.printBetweenInsts(OS, false);
759 
760     size_t Start = OS.tell();
761     if (LeadingAddr)
762       OS << format("%8" PRIx64 ":", Address.Address);
763     if (ShowRawInsn) {
764       size_t Pos = 0, End = Bytes.size();
765       for (; Pos + 4 <= End; Pos += 4)
766         OS << ' '
767            << format_hex_no_prefix(
768                   llvm::support::endian::read<uint32_t>(Bytes.data() + Pos,
769                                                         llvm::support::little),
770                   8);
771       if (Pos < End) {
772         OS << ' ';
773         dumpBytes(Bytes.slice(Pos), OS);
774       }
775     }
776 
777     AlignToInstStartColumn(Start, STI, OS);
778 
779     if (MI) {
780       IP.printInst(MI, Address.Address, "", STI, OS);
781     } else
782       OS << "\t<unknown>";
783   }
784 };
785 AArch64PrettyPrinter AArch64PrettyPrinterInst;
786 
selectPrettyPrinter(Triple const & Triple)787 PrettyPrinter &selectPrettyPrinter(Triple const &Triple) {
788   switch(Triple.getArch()) {
789   default:
790     return PrettyPrinterInst;
791   case Triple::hexagon:
792     return HexagonPrettyPrinterInst;
793   case Triple::amdgcn:
794     return AMDGCNPrettyPrinterInst;
795   case Triple::bpfel:
796   case Triple::bpfeb:
797     return BPFPrettyPrinterInst;
798   case Triple::arm:
799   case Triple::armeb:
800   case Triple::thumb:
801   case Triple::thumbeb:
802     return ARMPrettyPrinterInst;
803   case Triple::aarch64:
804   case Triple::aarch64_be:
805   case Triple::aarch64_32:
806     return AArch64PrettyPrinterInst;
807   }
808 }
809 }
810 
getElfSymbolType(const ObjectFile & Obj,const SymbolRef & Sym)811 static uint8_t getElfSymbolType(const ObjectFile &Obj, const SymbolRef &Sym) {
812   assert(Obj.isELF());
813   if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(&Obj))
814     return unwrapOrError(Elf32LEObj->getSymbol(Sym.getRawDataRefImpl()),
815                          Obj.getFileName())
816         ->getType();
817   if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(&Obj))
818     return unwrapOrError(Elf64LEObj->getSymbol(Sym.getRawDataRefImpl()),
819                          Obj.getFileName())
820         ->getType();
821   if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(&Obj))
822     return unwrapOrError(Elf32BEObj->getSymbol(Sym.getRawDataRefImpl()),
823                          Obj.getFileName())
824         ->getType();
825   if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(&Obj))
826     return unwrapOrError(Elf64BEObj->getSymbol(Sym.getRawDataRefImpl()),
827                          Obj.getFileName())
828         ->getType();
829   llvm_unreachable("Unsupported binary format");
830 }
831 
832 template <class ELFT>
833 static void
addDynamicElfSymbols(const ELFObjectFile<ELFT> & Obj,std::map<SectionRef,SectionSymbolsTy> & AllSymbols)834 addDynamicElfSymbols(const ELFObjectFile<ELFT> &Obj,
835                      std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
836   for (auto Symbol : Obj.getDynamicSymbolIterators()) {
837     uint8_t SymbolType = Symbol.getELFType();
838     if (SymbolType == ELF::STT_SECTION)
839       continue;
840 
841     uint64_t Address = unwrapOrError(Symbol.getAddress(), Obj.getFileName());
842     // ELFSymbolRef::getAddress() returns size instead of value for common
843     // symbols which is not desirable for disassembly output. Overriding.
844     if (SymbolType == ELF::STT_COMMON)
845       Address = unwrapOrError(Obj.getSymbol(Symbol.getRawDataRefImpl()),
846                               Obj.getFileName())
847                     ->st_value;
848 
849     StringRef Name = unwrapOrError(Symbol.getName(), Obj.getFileName());
850     if (Name.empty())
851       continue;
852 
853     section_iterator SecI =
854         unwrapOrError(Symbol.getSection(), Obj.getFileName());
855     if (SecI == Obj.section_end())
856       continue;
857 
858     AllSymbols[*SecI].emplace_back(Address, Name, SymbolType);
859   }
860 }
861 
862 static void
addDynamicElfSymbols(const ELFObjectFileBase & Obj,std::map<SectionRef,SectionSymbolsTy> & AllSymbols)863 addDynamicElfSymbols(const ELFObjectFileBase &Obj,
864                      std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
865   if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(&Obj))
866     addDynamicElfSymbols(*Elf32LEObj, AllSymbols);
867   else if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(&Obj))
868     addDynamicElfSymbols(*Elf64LEObj, AllSymbols);
869   else if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(&Obj))
870     addDynamicElfSymbols(*Elf32BEObj, AllSymbols);
871   else if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(&Obj))
872     addDynamicElfSymbols(*Elf64BEObj, AllSymbols);
873   else
874     llvm_unreachable("Unsupported binary format");
875 }
876 
getWasmCodeSection(const WasmObjectFile & Obj)877 static std::optional<SectionRef> getWasmCodeSection(const WasmObjectFile &Obj) {
878   for (auto SecI : Obj.sections()) {
879     const WasmSection &Section = Obj.getWasmSection(SecI);
880     if (Section.Type == wasm::WASM_SEC_CODE)
881       return SecI;
882   }
883   return std::nullopt;
884 }
885 
886 static void
addMissingWasmCodeSymbols(const WasmObjectFile & Obj,std::map<SectionRef,SectionSymbolsTy> & AllSymbols)887 addMissingWasmCodeSymbols(const WasmObjectFile &Obj,
888                           std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
889   std::optional<SectionRef> Section = getWasmCodeSection(Obj);
890   if (!Section)
891     return;
892   SectionSymbolsTy &Symbols = AllSymbols[*Section];
893 
894   std::set<uint64_t> SymbolAddresses;
895   for (const auto &Sym : Symbols)
896     SymbolAddresses.insert(Sym.Addr);
897 
898   for (const wasm::WasmFunction &Function : Obj.functions()) {
899     uint64_t Address = Function.CodeSectionOffset;
900     // Only add fallback symbols for functions not already present in the symbol
901     // table.
902     if (SymbolAddresses.count(Address))
903       continue;
904     // This function has no symbol, so it should have no SymbolName.
905     assert(Function.SymbolName.empty());
906     // We use DebugName for the name, though it may be empty if there is no
907     // "name" custom section, or that section is missing a name for this
908     // function.
909     StringRef Name = Function.DebugName;
910     Symbols.emplace_back(Address, Name, ELF::STT_NOTYPE);
911   }
912 }
913 
addPltEntries(const ObjectFile & Obj,std::map<SectionRef,SectionSymbolsTy> & AllSymbols,StringSaver & Saver)914 static void addPltEntries(const ObjectFile &Obj,
915                           std::map<SectionRef, SectionSymbolsTy> &AllSymbols,
916                           StringSaver &Saver) {
917   std::optional<SectionRef> Plt;
918   for (const SectionRef &Section : Obj.sections()) {
919     Expected<StringRef> SecNameOrErr = Section.getName();
920     if (!SecNameOrErr) {
921       consumeError(SecNameOrErr.takeError());
922       continue;
923     }
924     if (*SecNameOrErr == ".plt")
925       Plt = Section;
926   }
927   if (!Plt)
928     return;
929   if (auto *ElfObj = dyn_cast<ELFObjectFileBase>(&Obj)) {
930     for (auto PltEntry : ElfObj->getPltAddresses()) {
931       if (PltEntry.first) {
932         SymbolRef Symbol(*PltEntry.first, ElfObj);
933         uint8_t SymbolType = getElfSymbolType(Obj, Symbol);
934         if (Expected<StringRef> NameOrErr = Symbol.getName()) {
935           if (!NameOrErr->empty())
936             AllSymbols[*Plt].emplace_back(
937                 PltEntry.second, Saver.save((*NameOrErr + "@plt").str()),
938                 SymbolType);
939           continue;
940         } else {
941           // The warning has been reported in disassembleObject().
942           consumeError(NameOrErr.takeError());
943         }
944       }
945       reportWarning("PLT entry at 0x" + Twine::utohexstr(PltEntry.second) +
946                         " references an invalid symbol",
947                     Obj.getFileName());
948     }
949   }
950 }
951 
952 // Normally the disassembly output will skip blocks of zeroes. This function
953 // returns the number of zero bytes that can be skipped when dumping the
954 // disassembly of the instructions in Buf.
countSkippableZeroBytes(ArrayRef<uint8_t> Buf)955 static size_t countSkippableZeroBytes(ArrayRef<uint8_t> Buf) {
956   // Find the number of leading zeroes.
957   size_t N = 0;
958   while (N < Buf.size() && !Buf[N])
959     ++N;
960 
961   // We may want to skip blocks of zero bytes, but unless we see
962   // at least 8 of them in a row.
963   if (N < 8)
964     return 0;
965 
966   // We skip zeroes in multiples of 4 because do not want to truncate an
967   // instruction if it starts with a zero byte.
968   return N & ~0x3;
969 }
970 
971 // Returns a map from sections to their relocations.
972 static std::map<SectionRef, std::vector<RelocationRef>>
getRelocsMap(object::ObjectFile const & Obj)973 getRelocsMap(object::ObjectFile const &Obj) {
974   std::map<SectionRef, std::vector<RelocationRef>> Ret;
975   uint64_t I = (uint64_t)-1;
976   for (SectionRef Sec : Obj.sections()) {
977     ++I;
978     Expected<section_iterator> RelocatedOrErr = Sec.getRelocatedSection();
979     if (!RelocatedOrErr)
980       reportError(Obj.getFileName(),
981                   "section (" + Twine(I) +
982                       "): failed to get a relocated section: " +
983                       toString(RelocatedOrErr.takeError()));
984 
985     section_iterator Relocated = *RelocatedOrErr;
986     if (Relocated == Obj.section_end() || !checkSectionFilter(*Relocated).Keep)
987       continue;
988     std::vector<RelocationRef> &V = Ret[*Relocated];
989     append_range(V, Sec.relocations());
990     // Sort relocations by address.
991     llvm::stable_sort(V, isRelocAddressLess);
992   }
993   return Ret;
994 }
995 
996 // Used for --adjust-vma to check if address should be adjusted by the
997 // specified value for a given section.
998 // For ELF we do not adjust non-allocatable sections like debug ones,
999 // because they are not loadable.
1000 // TODO: implement for other file formats.
shouldAdjustVA(const SectionRef & Section)1001 static bool shouldAdjustVA(const SectionRef &Section) {
1002   const ObjectFile *Obj = Section.getObject();
1003   if (Obj->isELF())
1004     return ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC;
1005   return false;
1006 }
1007 
1008 
1009 typedef std::pair<uint64_t, char> MappingSymbolPair;
getMappingSymbolKind(ArrayRef<MappingSymbolPair> MappingSymbols,uint64_t Address)1010 static char getMappingSymbolKind(ArrayRef<MappingSymbolPair> MappingSymbols,
1011                                  uint64_t Address) {
1012   auto It =
1013       partition_point(MappingSymbols, [Address](const MappingSymbolPair &Val) {
1014         return Val.first <= Address;
1015       });
1016   // Return zero for any address before the first mapping symbol; this means
1017   // we should use the default disassembly mode, depending on the target.
1018   if (It == MappingSymbols.begin())
1019     return '\x00';
1020   return (It - 1)->second;
1021 }
1022 
dumpARMELFData(uint64_t SectionAddr,uint64_t Index,uint64_t End,const ObjectFile & Obj,ArrayRef<uint8_t> Bytes,ArrayRef<MappingSymbolPair> MappingSymbols,const MCSubtargetInfo & STI,raw_ostream & OS)1023 static uint64_t dumpARMELFData(uint64_t SectionAddr, uint64_t Index,
1024                                uint64_t End, const ObjectFile &Obj,
1025                                ArrayRef<uint8_t> Bytes,
1026                                ArrayRef<MappingSymbolPair> MappingSymbols,
1027                                const MCSubtargetInfo &STI, raw_ostream &OS) {
1028   support::endianness Endian =
1029       Obj.isLittleEndian() ? support::little : support::big;
1030   size_t Start = OS.tell();
1031   OS << format("%8" PRIx64 ": ", SectionAddr + Index);
1032   if (Index + 4 <= End) {
1033     dumpBytes(Bytes.slice(Index, 4), OS);
1034     AlignToInstStartColumn(Start, STI, OS);
1035     OS << "\t.word\t"
1036            << format_hex(support::endian::read32(Bytes.data() + Index, Endian),
1037                          10);
1038     return 4;
1039   }
1040   if (Index + 2 <= End) {
1041     dumpBytes(Bytes.slice(Index, 2), OS);
1042     AlignToInstStartColumn(Start, STI, OS);
1043     OS << "\t.short\t"
1044        << format_hex(support::endian::read16(Bytes.data() + Index, Endian), 6);
1045     return 2;
1046   }
1047   dumpBytes(Bytes.slice(Index, 1), OS);
1048   AlignToInstStartColumn(Start, STI, OS);
1049   OS << "\t.byte\t" << format_hex(Bytes[Index], 4);
1050   return 1;
1051 }
1052 
dumpELFData(uint64_t SectionAddr,uint64_t Index,uint64_t End,ArrayRef<uint8_t> Bytes)1053 static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
1054                         ArrayRef<uint8_t> Bytes) {
1055   // print out data up to 8 bytes at a time in hex and ascii
1056   uint8_t AsciiData[9] = {'\0'};
1057   uint8_t Byte;
1058   int NumBytes = 0;
1059 
1060   for (; Index < End; ++Index) {
1061     if (NumBytes == 0)
1062       outs() << format("%8" PRIx64 ":", SectionAddr + Index);
1063     Byte = Bytes.slice(Index)[0];
1064     outs() << format(" %02x", Byte);
1065     AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';
1066 
1067     uint8_t IndentOffset = 0;
1068     NumBytes++;
1069     if (Index == End - 1 || NumBytes > 8) {
1070       // Indent the space for less than 8 bytes data.
1071       // 2 spaces for byte and one for space between bytes
1072       IndentOffset = 3 * (8 - NumBytes);
1073       for (int Excess = NumBytes; Excess < 8; Excess++)
1074         AsciiData[Excess] = '\0';
1075       NumBytes = 8;
1076     }
1077     if (NumBytes == 8) {
1078       AsciiData[8] = '\0';
1079       outs() << std::string(IndentOffset, ' ') << "         ";
1080       outs() << reinterpret_cast<char *>(AsciiData);
1081       outs() << '\n';
1082       NumBytes = 0;
1083     }
1084   }
1085 }
1086 
createSymbolInfo(const ObjectFile & Obj,const SymbolRef & Symbol)1087 SymbolInfoTy objdump::createSymbolInfo(const ObjectFile &Obj,
1088                                        const SymbolRef &Symbol) {
1089   const StringRef FileName = Obj.getFileName();
1090   const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
1091   const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
1092 
1093   if (Obj.isXCOFF() && SymbolDescription) {
1094     const auto &XCOFFObj = cast<XCOFFObjectFile>(Obj);
1095     DataRefImpl SymbolDRI = Symbol.getRawDataRefImpl();
1096 
1097     const uint32_t SymbolIndex = XCOFFObj.getSymbolIndex(SymbolDRI.p);
1098     std::optional<XCOFF::StorageMappingClass> Smc =
1099         getXCOFFSymbolCsectSMC(XCOFFObj, Symbol);
1100     return SymbolInfoTy(Addr, Name, Smc, SymbolIndex,
1101                         isLabel(XCOFFObj, Symbol));
1102   } else if (Obj.isXCOFF()) {
1103     const SymbolRef::Type SymType = unwrapOrError(Symbol.getType(), FileName);
1104     return SymbolInfoTy(Addr, Name, SymType, true);
1105   } else
1106     return SymbolInfoTy(Addr, Name,
1107                         Obj.isELF() ? getElfSymbolType(Obj, Symbol)
1108                                     : (uint8_t)ELF::STT_NOTYPE);
1109 }
1110 
createDummySymbolInfo(const ObjectFile & Obj,const uint64_t Addr,StringRef & Name,uint8_t Type)1111 static SymbolInfoTy createDummySymbolInfo(const ObjectFile &Obj,
1112                                           const uint64_t Addr, StringRef &Name,
1113                                           uint8_t Type) {
1114   if (Obj.isXCOFF() && SymbolDescription)
1115     return SymbolInfoTy(Addr, Name, std::nullopt, std::nullopt, false);
1116   else
1117     return SymbolInfoTy(Addr, Name, Type);
1118 }
1119 
1120 static void
collectBBAddrMapLabels(const std::unordered_map<uint64_t,BBAddrMap> & AddrToBBAddrMap,uint64_t SectionAddr,uint64_t Start,uint64_t End,std::unordered_map<uint64_t,std::vector<std::string>> & Labels)1121 collectBBAddrMapLabels(const std::unordered_map<uint64_t, BBAddrMap> &AddrToBBAddrMap,
1122                        uint64_t SectionAddr, uint64_t Start, uint64_t End,
1123                        std::unordered_map<uint64_t, std::vector<std::string>> &Labels) {
1124   if (AddrToBBAddrMap.empty())
1125     return;
1126   Labels.clear();
1127   uint64_t StartAddress = SectionAddr + Start;
1128   uint64_t EndAddress = SectionAddr + End;
1129   auto Iter = AddrToBBAddrMap.find(StartAddress);
1130   if (Iter == AddrToBBAddrMap.end())
1131     return;
1132   for (unsigned I = 0, Size = Iter->second.BBEntries.size(); I < Size; ++I) {
1133     uint64_t BBAddress = Iter->second.BBEntries[I].Offset + Iter->second.Addr;
1134     if (BBAddress >= EndAddress)
1135       continue;
1136     Labels[BBAddress].push_back(("BB" + Twine(I)).str());
1137   }
1138 }
1139 
collectLocalBranchTargets(ArrayRef<uint8_t> Bytes,const MCInstrAnalysis * MIA,MCDisassembler * DisAsm,MCInstPrinter * IP,const MCSubtargetInfo * STI,uint64_t SectionAddr,uint64_t Start,uint64_t End,std::unordered_map<uint64_t,std::string> & Labels)1140 static void collectLocalBranchTargets(
1141     ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA, MCDisassembler *DisAsm,
1142     MCInstPrinter *IP, const MCSubtargetInfo *STI, uint64_t SectionAddr,
1143     uint64_t Start, uint64_t End, std::unordered_map<uint64_t, std::string> &Labels) {
1144   // So far only supports PowerPC and X86.
1145   if (!STI->getTargetTriple().isPPC() && !STI->getTargetTriple().isX86())
1146     return;
1147 
1148   Labels.clear();
1149   unsigned LabelCount = 0;
1150   Start += SectionAddr;
1151   End += SectionAddr;
1152   uint64_t Index = Start;
1153   while (Index < End) {
1154     // Disassemble a real instruction and record function-local branch labels.
1155     MCInst Inst;
1156     uint64_t Size;
1157     ArrayRef<uint8_t> ThisBytes = Bytes.slice(Index - SectionAddr);
1158     bool Disassembled =
1159         DisAsm->getInstruction(Inst, Size, ThisBytes, Index, nulls());
1160     if (Size == 0)
1161       Size = std::min<uint64_t>(ThisBytes.size(),
1162                                 DisAsm->suggestBytesToSkip(ThisBytes, Index));
1163 
1164     if (Disassembled && MIA) {
1165       uint64_t Target;
1166       bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
1167       // On PowerPC, if the address of a branch is the same as the target, it
1168       // means that it's a function call. Do not mark the label for this case.
1169       if (TargetKnown && (Target >= Start && Target < End) &&
1170           !Labels.count(Target) &&
1171           !(STI->getTargetTriple().isPPC() && Target == Index))
1172         Labels[Target] = ("L" + Twine(LabelCount++)).str();
1173     }
1174     Index += Size;
1175   }
1176 }
1177 
1178 // Create an MCSymbolizer for the target and add it to the MCDisassembler.
1179 // This is currently only used on AMDGPU, and assumes the format of the
1180 // void * argument passed to AMDGPU's createMCSymbolizer.
addSymbolizer(MCContext & Ctx,const Target * Target,StringRef TripleName,MCDisassembler * DisAsm,uint64_t SectionAddr,ArrayRef<uint8_t> Bytes,SectionSymbolsTy & Symbols,std::vector<std::unique_ptr<std::string>> & SynthesizedLabelNames)1181 static void addSymbolizer(
1182     MCContext &Ctx, const Target *Target, StringRef TripleName,
1183     MCDisassembler *DisAsm, uint64_t SectionAddr, ArrayRef<uint8_t> Bytes,
1184     SectionSymbolsTy &Symbols,
1185     std::vector<std::unique_ptr<std::string>> &SynthesizedLabelNames) {
1186 
1187   std::unique_ptr<MCRelocationInfo> RelInfo(
1188       Target->createMCRelocationInfo(TripleName, Ctx));
1189   if (!RelInfo)
1190     return;
1191   std::unique_ptr<MCSymbolizer> Symbolizer(Target->createMCSymbolizer(
1192       TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
1193   MCSymbolizer *SymbolizerPtr = &*Symbolizer;
1194   DisAsm->setSymbolizer(std::move(Symbolizer));
1195 
1196   if (!SymbolizeOperands)
1197     return;
1198 
1199   // Synthesize labels referenced by branch instructions by
1200   // disassembling, discarding the output, and collecting the referenced
1201   // addresses from the symbolizer.
1202   for (size_t Index = 0; Index != Bytes.size();) {
1203     MCInst Inst;
1204     uint64_t Size;
1205     ArrayRef<uint8_t> ThisBytes = Bytes.slice(Index);
1206     const uint64_t ThisAddr = SectionAddr + Index;
1207     DisAsm->getInstruction(Inst, Size, ThisBytes, ThisAddr, nulls());
1208     if (Size == 0)
1209       Size = std::min<uint64_t>(ThisBytes.size(),
1210                                 DisAsm->suggestBytesToSkip(ThisBytes, Index));
1211     Index += Size;
1212   }
1213   ArrayRef<uint64_t> LabelAddrsRef = SymbolizerPtr->getReferencedAddresses();
1214   // Copy and sort to remove duplicates.
1215   std::vector<uint64_t> LabelAddrs;
1216   LabelAddrs.insert(LabelAddrs.end(), LabelAddrsRef.begin(),
1217                     LabelAddrsRef.end());
1218   llvm::sort(LabelAddrs);
1219   LabelAddrs.resize(std::unique(LabelAddrs.begin(), LabelAddrs.end()) -
1220                     LabelAddrs.begin());
1221   // Add the labels.
1222   for (unsigned LabelNum = 0; LabelNum != LabelAddrs.size(); ++LabelNum) {
1223     auto Name = std::make_unique<std::string>();
1224     *Name = (Twine("L") + Twine(LabelNum)).str();
1225     SynthesizedLabelNames.push_back(std::move(Name));
1226     Symbols.push_back(SymbolInfoTy(
1227         LabelAddrs[LabelNum], *SynthesizedLabelNames.back(), ELF::STT_NOTYPE));
1228   }
1229   llvm::stable_sort(Symbols);
1230   // Recreate the symbolizer with the new symbols list.
1231   RelInfo.reset(Target->createMCRelocationInfo(TripleName, Ctx));
1232   Symbolizer.reset(Target->createMCSymbolizer(
1233       TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
1234   DisAsm->setSymbolizer(std::move(Symbolizer));
1235 }
1236 
getSegmentName(const MachOObjectFile * MachO,const SectionRef & Section)1237 static StringRef getSegmentName(const MachOObjectFile *MachO,
1238                                 const SectionRef &Section) {
1239   if (MachO) {
1240     DataRefImpl DR = Section.getRawDataRefImpl();
1241     StringRef SegmentName = MachO->getSectionFinalSegmentName(DR);
1242     return SegmentName;
1243   }
1244   return "";
1245 }
1246 
emitPostInstructionInfo(formatted_raw_ostream & FOS,const MCAsmInfo & MAI,const MCSubtargetInfo & STI,StringRef Comments,LiveVariablePrinter & LVP)1247 static void emitPostInstructionInfo(formatted_raw_ostream &FOS,
1248                                     const MCAsmInfo &MAI,
1249                                     const MCSubtargetInfo &STI,
1250                                     StringRef Comments,
1251                                     LiveVariablePrinter &LVP) {
1252   do {
1253     if (!Comments.empty()) {
1254       // Emit a line of comments.
1255       StringRef Comment;
1256       std::tie(Comment, Comments) = Comments.split('\n');
1257       // MAI.getCommentColumn() assumes that instructions are printed at the
1258       // position of 8, while getInstStartColumn() returns the actual position.
1259       unsigned CommentColumn =
1260           MAI.getCommentColumn() - 8 + getInstStartColumn(STI);
1261       FOS.PadToColumn(CommentColumn);
1262       FOS << MAI.getCommentString() << ' ' << Comment;
1263     }
1264     LVP.printAfterInst(FOS);
1265     FOS << '\n';
1266   } while (!Comments.empty());
1267   FOS.flush();
1268 }
1269 
createFakeELFSections(ObjectFile & Obj)1270 static void createFakeELFSections(ObjectFile &Obj) {
1271   assert(Obj.isELF());
1272   if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(&Obj))
1273     Elf32LEObj->createFakeSections();
1274   else if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(&Obj))
1275     Elf64LEObj->createFakeSections();
1276   else if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(&Obj))
1277     Elf32BEObj->createFakeSections();
1278   else if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(&Obj))
1279     Elf64BEObj->createFakeSections();
1280   else
1281     llvm_unreachable("Unsupported binary format");
1282 }
1283 
1284 // Tries to fetch a more complete version of the given object file using its
1285 // Build ID. Returns std::nullopt if nothing was found.
1286 static std::optional<OwningBinary<Binary>>
fetchBinaryByBuildID(const ObjectFile & Obj)1287 fetchBinaryByBuildID(const ObjectFile &Obj) {
1288   std::optional<object::BuildIDRef> BuildID = getBuildID(&Obj);
1289   if (!BuildID)
1290     return std::nullopt;
1291   std::optional<std::string> Path = BIDFetcher->fetch(*BuildID);
1292   if (!Path)
1293     return std::nullopt;
1294   Expected<OwningBinary<Binary>> DebugBinary = createBinary(*Path);
1295   if (!DebugBinary) {
1296     reportWarning(toString(DebugBinary.takeError()), *Path);
1297     return std::nullopt;
1298   }
1299   return std::move(*DebugBinary);
1300 }
1301 
disassembleObject(const Target * TheTarget,ObjectFile & Obj,const ObjectFile & DbgObj,MCContext & Ctx,MCDisassembler * PrimaryDisAsm,MCDisassembler * SecondaryDisAsm,const MCInstrAnalysis * MIA,MCInstPrinter * IP,const MCSubtargetInfo * PrimarySTI,const MCSubtargetInfo * SecondarySTI,PrettyPrinter & PIP,SourcePrinter & SP,bool InlineRelocs)1302 static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
1303                               const ObjectFile &DbgObj, MCContext &Ctx,
1304                               MCDisassembler *PrimaryDisAsm,
1305                               MCDisassembler *SecondaryDisAsm,
1306                               const MCInstrAnalysis *MIA, MCInstPrinter *IP,
1307                               const MCSubtargetInfo *PrimarySTI,
1308                               const MCSubtargetInfo *SecondarySTI,
1309                               PrettyPrinter &PIP, SourcePrinter &SP,
1310                               bool InlineRelocs) {
1311   const MCSubtargetInfo *STI = PrimarySTI;
1312   MCDisassembler *DisAsm = PrimaryDisAsm;
1313   bool PrimaryIsThumb = false;
1314   if (isArmElf(Obj))
1315     PrimaryIsThumb = STI->checkFeatures("+thumb-mode");
1316 
1317   std::map<SectionRef, std::vector<RelocationRef>> RelocMap;
1318   if (InlineRelocs)
1319     RelocMap = getRelocsMap(Obj);
1320   bool Is64Bits = Obj.getBytesInAddress() > 4;
1321 
1322   // Create a mapping from virtual address to symbol name.  This is used to
1323   // pretty print the symbols while disassembling.
1324   std::map<SectionRef, SectionSymbolsTy> AllSymbols;
1325   SectionSymbolsTy AbsoluteSymbols;
1326   const StringRef FileName = Obj.getFileName();
1327   const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(&Obj);
1328   for (const SymbolRef &Symbol : Obj.symbols()) {
1329     Expected<StringRef> NameOrErr = Symbol.getName();
1330     if (!NameOrErr) {
1331       reportWarning(toString(NameOrErr.takeError()), FileName);
1332       continue;
1333     }
1334     if (NameOrErr->empty() && !(Obj.isXCOFF() && SymbolDescription))
1335       continue;
1336 
1337     if (Obj.isELF() && getElfSymbolType(Obj, Symbol) == ELF::STT_SECTION)
1338       continue;
1339 
1340     if (MachO) {
1341       // __mh_(execute|dylib|dylinker|bundle|preload|object)_header are special
1342       // symbols that support MachO header introspection. They do not bind to
1343       // code locations and are irrelevant for disassembly.
1344       if (NameOrErr->startswith("__mh_") && NameOrErr->endswith("_header"))
1345         continue;
1346       // Don't ask a Mach-O STAB symbol for its section unless you know that
1347       // STAB symbol's section field refers to a valid section index. Otherwise
1348       // the symbol may error trying to load a section that does not exist.
1349       DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
1350       uint8_t NType = (MachO->is64Bit() ?
1351                        MachO->getSymbol64TableEntry(SymDRI).n_type:
1352                        MachO->getSymbolTableEntry(SymDRI).n_type);
1353       if (NType & MachO::N_STAB)
1354         continue;
1355     }
1356 
1357     section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
1358     if (SecI != Obj.section_end())
1359       AllSymbols[*SecI].push_back(createSymbolInfo(Obj, Symbol));
1360     else
1361       AbsoluteSymbols.push_back(createSymbolInfo(Obj, Symbol));
1362   }
1363 
1364   if (AllSymbols.empty() && Obj.isELF())
1365     addDynamicElfSymbols(cast<ELFObjectFileBase>(Obj), AllSymbols);
1366 
1367   if (Obj.isWasm())
1368     addMissingWasmCodeSymbols(cast<WasmObjectFile>(Obj), AllSymbols);
1369 
1370   if (Obj.isELF() && Obj.sections().empty())
1371     createFakeELFSections(Obj);
1372 
1373   BumpPtrAllocator A;
1374   StringSaver Saver(A);
1375   addPltEntries(Obj, AllSymbols, Saver);
1376 
1377   // Create a mapping from virtual address to section. An empty section can
1378   // cause more than one section at the same address. Sort such sections to be
1379   // before same-addressed non-empty sections so that symbol lookups prefer the
1380   // non-empty section.
1381   std::vector<std::pair<uint64_t, SectionRef>> SectionAddresses;
1382   for (SectionRef Sec : Obj.sections())
1383     SectionAddresses.emplace_back(Sec.getAddress(), Sec);
1384   llvm::stable_sort(SectionAddresses, [](const auto &LHS, const auto &RHS) {
1385     if (LHS.first != RHS.first)
1386       return LHS.first < RHS.first;
1387     return LHS.second.getSize() < RHS.second.getSize();
1388   });
1389 
1390   // Linked executables (.exe and .dll files) typically don't include a real
1391   // symbol table but they might contain an export table.
1392   if (const auto *COFFObj = dyn_cast<COFFObjectFile>(&Obj)) {
1393     for (const auto &ExportEntry : COFFObj->export_directories()) {
1394       StringRef Name;
1395       if (Error E = ExportEntry.getSymbolName(Name))
1396         reportError(std::move(E), Obj.getFileName());
1397       if (Name.empty())
1398         continue;
1399 
1400       uint32_t RVA;
1401       if (Error E = ExportEntry.getExportRVA(RVA))
1402         reportError(std::move(E), Obj.getFileName());
1403 
1404       uint64_t VA = COFFObj->getImageBase() + RVA;
1405       auto Sec = partition_point(
1406           SectionAddresses, [VA](const std::pair<uint64_t, SectionRef> &O) {
1407             return O.first <= VA;
1408           });
1409       if (Sec != SectionAddresses.begin()) {
1410         --Sec;
1411         AllSymbols[Sec->second].emplace_back(VA, Name, ELF::STT_NOTYPE);
1412       } else
1413         AbsoluteSymbols.emplace_back(VA, Name, ELF::STT_NOTYPE);
1414     }
1415   }
1416 
1417   // Sort all the symbols, this allows us to use a simple binary search to find
1418   // Multiple symbols can have the same address. Use a stable sort to stabilize
1419   // the output.
1420   StringSet<> FoundDisasmSymbolSet;
1421   for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
1422     llvm::stable_sort(SecSyms.second);
1423   llvm::stable_sort(AbsoluteSymbols);
1424 
1425   std::unique_ptr<DWARFContext> DICtx;
1426   LiveVariablePrinter LVP(*Ctx.getRegisterInfo(), *STI);
1427 
1428   if (DbgVariables != DVDisabled) {
1429     DICtx = DWARFContext::create(DbgObj);
1430     for (const std::unique_ptr<DWARFUnit> &CU : DICtx->compile_units())
1431       LVP.addCompileUnit(CU->getUnitDIE(false));
1432   }
1433 
1434   LLVM_DEBUG(LVP.dump());
1435 
1436   std::unordered_map<uint64_t, BBAddrMap> AddrToBBAddrMap;
1437   auto ReadBBAddrMap = [&](std::optional<unsigned> SectionIndex =
1438                                std::nullopt) {
1439     AddrToBBAddrMap.clear();
1440     if (const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj)) {
1441       auto BBAddrMapsOrErr = Elf->readBBAddrMap(SectionIndex);
1442       if (!BBAddrMapsOrErr)
1443         reportWarning(toString(BBAddrMapsOrErr.takeError()), Obj.getFileName());
1444       for (auto &FunctionBBAddrMap : *BBAddrMapsOrErr)
1445         AddrToBBAddrMap.emplace(FunctionBBAddrMap.Addr,
1446                                 std::move(FunctionBBAddrMap));
1447     }
1448   };
1449 
1450   // For non-relocatable objects, Read all LLVM_BB_ADDR_MAP sections into a
1451   // single mapping, since they don't have any conflicts.
1452   if (SymbolizeOperands && !Obj.isRelocatableObject())
1453     ReadBBAddrMap();
1454 
1455   for (const SectionRef &Section : ToolSectionFilter(Obj)) {
1456     if (FilterSections.empty() && !DisassembleAll &&
1457         (!Section.isText() || Section.isVirtual()))
1458       continue;
1459 
1460     uint64_t SectionAddr = Section.getAddress();
1461     uint64_t SectSize = Section.getSize();
1462     if (!SectSize)
1463       continue;
1464 
1465     // For relocatable object files, read the LLVM_BB_ADDR_MAP section
1466     // corresponding to this section, if present.
1467     if (SymbolizeOperands && Obj.isRelocatableObject())
1468       ReadBBAddrMap(Section.getIndex());
1469 
1470     // Get the list of all the symbols in this section.
1471     SectionSymbolsTy &Symbols = AllSymbols[Section];
1472     std::vector<MappingSymbolPair> MappingSymbols;
1473     if (hasMappingSymbols(Obj)) {
1474       for (const auto &Symb : Symbols) {
1475         uint64_t Address = Symb.Addr;
1476         StringRef Name = Symb.Name;
1477         if (Name.startswith("$d"))
1478           MappingSymbols.emplace_back(Address - SectionAddr, 'd');
1479         if (Name.startswith("$x"))
1480           MappingSymbols.emplace_back(Address - SectionAddr, 'x');
1481         if (Name.startswith("$a"))
1482           MappingSymbols.emplace_back(Address - SectionAddr, 'a');
1483         if (Name.startswith("$t"))
1484           MappingSymbols.emplace_back(Address - SectionAddr, 't');
1485       }
1486     }
1487 
1488     llvm::sort(MappingSymbols);
1489 
1490     ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(
1491         unwrapOrError(Section.getContents(), Obj.getFileName()));
1492 
1493     std::vector<std::unique_ptr<std::string>> SynthesizedLabelNames;
1494     if (Obj.isELF() && Obj.getArch() == Triple::amdgcn) {
1495       // AMDGPU disassembler uses symbolizer for printing labels
1496       addSymbolizer(Ctx, TheTarget, TripleName, DisAsm, SectionAddr, Bytes,
1497                     Symbols, SynthesizedLabelNames);
1498     }
1499 
1500     StringRef SegmentName = getSegmentName(MachO, Section);
1501     StringRef SectionName = unwrapOrError(Section.getName(), Obj.getFileName());
1502     // If the section has no symbol at the start, just insert a dummy one.
1503     if (Symbols.empty() || Symbols[0].Addr != 0) {
1504       Symbols.insert(Symbols.begin(),
1505                      createDummySymbolInfo(Obj, SectionAddr, SectionName,
1506                                            Section.isText() ? ELF::STT_FUNC
1507                                                             : ELF::STT_OBJECT));
1508     }
1509 
1510     SmallString<40> Comments;
1511     raw_svector_ostream CommentStream(Comments);
1512 
1513     uint64_t VMAAdjustment = 0;
1514     if (shouldAdjustVA(Section))
1515       VMAAdjustment = AdjustVMA;
1516 
1517     // In executable and shared objects, r_offset holds a virtual address.
1518     // Subtract SectionAddr from the r_offset field of a relocation to get
1519     // the section offset.
1520     uint64_t RelAdjustment = Obj.isRelocatableObject() ? 0 : SectionAddr;
1521     uint64_t Size;
1522     uint64_t Index;
1523     bool PrintedSection = false;
1524     std::vector<RelocationRef> Rels = RelocMap[Section];
1525     std::vector<RelocationRef>::const_iterator RelCur = Rels.begin();
1526     std::vector<RelocationRef>::const_iterator RelEnd = Rels.end();
1527 
1528     // Loop over each chunk of code between two points where at least
1529     // one symbol is defined.
1530     for (size_t SI = 0, SE = Symbols.size(); SI != SE;) {
1531       // Advance SI past all the symbols starting at the same address,
1532       // and make an ArrayRef of them.
1533       unsigned FirstSI = SI;
1534       uint64_t Start = Symbols[SI].Addr;
1535       ArrayRef<SymbolInfoTy> SymbolsHere;
1536       while (SI != SE && Symbols[SI].Addr == Start)
1537         ++SI;
1538       SymbolsHere = ArrayRef<SymbolInfoTy>(&Symbols[FirstSI], SI - FirstSI);
1539 
1540       // Get the demangled names of all those symbols. We end up with a vector
1541       // of StringRef that holds the names we're going to use, and a vector of
1542       // std::string that stores the new strings returned by demangle(), if
1543       // any. If we don't call demangle() then that vector can stay empty.
1544       std::vector<StringRef> SymNamesHere;
1545       std::vector<std::string> DemangledSymNamesHere;
1546       if (Demangle) {
1547         // Fetch the demangled names and store them locally.
1548         for (const SymbolInfoTy &Symbol : SymbolsHere)
1549           DemangledSymNamesHere.push_back(demangle(Symbol.Name.str()));
1550         // Now we've finished modifying that vector, it's safe to make
1551         // a vector of StringRefs pointing into it.
1552         SymNamesHere.insert(SymNamesHere.begin(), DemangledSymNamesHere.begin(),
1553                             DemangledSymNamesHere.end());
1554       } else {
1555         for (const SymbolInfoTy &Symbol : SymbolsHere)
1556           SymNamesHere.push_back(Symbol.Name);
1557       }
1558 
1559       // Distinguish ELF data from code symbols, which will be used later on to
1560       // decide whether to 'disassemble' this chunk as a data declaration via
1561       // dumpELFData(), or whether to treat it as code.
1562       //
1563       // If data _and_ code symbols are defined at the same address, the code
1564       // takes priority, on the grounds that disassembling code is our main
1565       // purpose here, and it would be a worse failure to _not_ interpret
1566       // something that _was_ meaningful as code than vice versa.
1567       //
1568       // Any ELF symbol type that is not clearly data will be regarded as code.
1569       // In particular, one of the uses of STT_NOTYPE is for branch targets
1570       // inside functions, for which STT_FUNC would be inaccurate.
1571       //
1572       // So here, we spot whether there's any non-data symbol present at all,
1573       // and only set the DisassembleAsData flag if there isn't. Also, we use
1574       // this distinction to inform the decision of which symbol to print at
1575       // the head of the section, so that if we're printing code, we print a
1576       // code-related symbol name to go with it.
1577       bool DisassembleAsData = false;
1578       size_t DisplaySymIndex = SymbolsHere.size() - 1;
1579       if (Obj.isELF() && !DisassembleAll && Section.isText()) {
1580         DisassembleAsData = true; // unless we find a code symbol below
1581 
1582         for (size_t i = 0; i < SymbolsHere.size(); ++i) {
1583           uint8_t SymTy = SymbolsHere[i].Type;
1584           if (SymTy != ELF::STT_OBJECT && SymTy != ELF::STT_COMMON) {
1585             DisassembleAsData = false;
1586             DisplaySymIndex = i;
1587           }
1588         }
1589       }
1590 
1591       // Decide which symbol(s) from this collection we're going to print.
1592       std::vector<bool> SymsToPrint(SymbolsHere.size(), false);
1593       // If the user has given the --disassemble-symbols option, then we must
1594       // display every symbol in that set, and no others.
1595       if (!DisasmSymbolSet.empty()) {
1596         bool FoundAny = false;
1597         for (size_t i = 0; i < SymbolsHere.size(); ++i) {
1598           if (DisasmSymbolSet.count(SymNamesHere[i])) {
1599             SymsToPrint[i] = true;
1600             FoundAny = true;
1601           }
1602         }
1603 
1604         // And if none of the symbols here is one that the user asked for, skip
1605         // disassembling this entire chunk of code.
1606         if (!FoundAny)
1607           continue;
1608       } else {
1609         // Otherwise, print whichever symbol at this location is last in the
1610         // Symbols array, because that array is pre-sorted in a way intended to
1611         // correlate with priority of which symbol to display.
1612         SymsToPrint[DisplaySymIndex] = true;
1613       }
1614 
1615       // Now that we know we're disassembling this section, override the choice
1616       // of which symbols to display by printing _all_ of them at this address
1617       // if the user asked for all symbols.
1618       //
1619       // That way, '--show-all-symbols --disassemble-symbol=foo' will print
1620       // only the chunk of code headed by 'foo', but also show any other
1621       // symbols defined at that address, such as aliases for 'foo', or the ARM
1622       // mapping symbol preceding its code.
1623       if (ShowAllSymbols) {
1624         for (size_t i = 0; i < SymbolsHere.size(); ++i)
1625           SymsToPrint[i] = true;
1626       }
1627 
1628       if (Start < SectionAddr || StopAddress <= Start)
1629         continue;
1630 
1631       for (size_t i = 0; i < SymbolsHere.size(); ++i)
1632         FoundDisasmSymbolSet.insert(SymNamesHere[i]);
1633 
1634       // The end is the section end, the beginning of the next symbol, or
1635       // --stop-address.
1636       uint64_t End = std::min<uint64_t>(SectionAddr + SectSize, StopAddress);
1637       if (SI < SE)
1638         End = std::min(End, Symbols[SI].Addr);
1639       if (Start >= End || End <= StartAddress)
1640         continue;
1641       Start -= SectionAddr;
1642       End -= SectionAddr;
1643 
1644       if (!PrintedSection) {
1645         PrintedSection = true;
1646         outs() << "\nDisassembly of section ";
1647         if (!SegmentName.empty())
1648           outs() << SegmentName << ",";
1649         outs() << SectionName << ":\n";
1650       }
1651 
1652       outs() << '\n';
1653 
1654       for (size_t i = 0; i < SymbolsHere.size(); ++i) {
1655         if (!SymsToPrint[i])
1656           continue;
1657 
1658         const SymbolInfoTy &Symbol = SymbolsHere[i];
1659         const StringRef SymbolName = SymNamesHere[i];
1660 
1661         if (LeadingAddr)
1662           outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ",
1663                            SectionAddr + Start + VMAAdjustment);
1664         if (Obj.isXCOFF() && SymbolDescription) {
1665           outs() << getXCOFFSymbolDescription(Symbol, SymbolName) << ":\n";
1666         } else
1667           outs() << '<' << SymbolName << ">:\n";
1668       }
1669 
1670       // Don't print raw contents of a virtual section. A virtual section
1671       // doesn't have any contents in the file.
1672       if (Section.isVirtual()) {
1673         outs() << "...\n";
1674         continue;
1675       }
1676 
1677       // See if any of the symbols defined at this location triggers target-
1678       // specific disassembly behavior, e.g. of special descriptors or function
1679       // prelude information.
1680       //
1681       // We stop this loop at the first symbol that triggers some kind of
1682       // interesting behavior (if any), on the assumption that if two symbols
1683       // defined at the same address trigger two conflicting symbol handlers,
1684       // the object file is probably confused anyway, and it would make even
1685       // less sense to present the output of _both_ handlers, because that
1686       // would describe the same data twice.
1687       for (size_t SHI = 0; SHI < SymbolsHere.size(); ++SHI) {
1688         SymbolInfoTy Symbol = SymbolsHere[SHI];
1689 
1690         auto Status =
1691             DisAsm->onSymbolStart(Symbol, Size, Bytes.slice(Start, End - Start),
1692                                   SectionAddr + Start, CommentStream);
1693 
1694         if (!Status) {
1695           // If onSymbolStart returns std::nullopt, that means it didn't trigger
1696           // any interesting handling for this symbol. Try the other symbols
1697           // defined at this address.
1698           continue;
1699         }
1700 
1701         if (*Status == MCDisassembler::Fail) {
1702           // If onSymbolStart returns Fail, that means it identified some kind
1703           // of special data at this address, but wasn't able to disassemble it
1704           // meaningfully. So we fall back to disassembling the failed region
1705           // as bytes, assuming that the target detected the failure before
1706           // printing anything.
1707           //
1708           // Return values Success or SoftFail (i.e no 'real' failure) are
1709           // expected to mean that the target has emitted its own output.
1710           //
1711           // Either way, 'Size' will have been set to the amount of data
1712           // covered by whatever prologue the target identified. So we advance
1713           // our own position to beyond that. Sometimes that will be the entire
1714           // distance to the next symbol, and sometimes it will be just a
1715           // prologue and we should start disassembling instructions from where
1716           // it left off.
1717           outs() << "// Error in decoding " << SymNamesHere[SHI]
1718                  << " : Decoding failed region as bytes.\n";
1719           for (uint64_t I = 0; I < Size; ++I) {
1720             outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true)
1721                    << "\n";
1722           }
1723         }
1724         Start += Size;
1725         break;
1726       }
1727 
1728       Index = Start;
1729       if (SectionAddr < StartAddress)
1730         Index = std::max<uint64_t>(Index, StartAddress - SectionAddr);
1731 
1732       if (DisassembleAsData) {
1733         dumpELFData(SectionAddr, Index, End, Bytes);
1734         Index = End;
1735         continue;
1736       }
1737 
1738       bool DumpARMELFData = false;
1739       formatted_raw_ostream FOS(outs());
1740 
1741       std::unordered_map<uint64_t, std::string> AllLabels;
1742       std::unordered_map<uint64_t, std::vector<std::string>> BBAddrMapLabels;
1743       if (SymbolizeOperands) {
1744         collectLocalBranchTargets(Bytes, MIA, DisAsm, IP, PrimarySTI,
1745                                   SectionAddr, Index, End, AllLabels);
1746         collectBBAddrMapLabels(AddrToBBAddrMap, SectionAddr, Index, End,
1747                                BBAddrMapLabels);
1748       }
1749 
1750       while (Index < End) {
1751         // ARM and AArch64 ELF binaries can interleave data and text in the
1752         // same section. We rely on the markers introduced to understand what
1753         // we need to dump. If the data marker is within a function, it is
1754         // denoted as a word/short etc.
1755         if (!MappingSymbols.empty()) {
1756           char Kind = getMappingSymbolKind(MappingSymbols, Index);
1757           DumpARMELFData = Kind == 'd';
1758           if (SecondarySTI) {
1759             if (Kind == 'a') {
1760               STI = PrimaryIsThumb ? SecondarySTI : PrimarySTI;
1761               DisAsm = PrimaryIsThumb ? SecondaryDisAsm : PrimaryDisAsm;
1762             } else if (Kind == 't') {
1763               STI = PrimaryIsThumb ? PrimarySTI : SecondarySTI;
1764               DisAsm = PrimaryIsThumb ? PrimaryDisAsm : SecondaryDisAsm;
1765             }
1766           }
1767         }
1768 
1769         if (DumpARMELFData) {
1770           Size = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes,
1771                                 MappingSymbols, *STI, FOS);
1772         } else {
1773           // When -z or --disassemble-zeroes are given we always dissasemble
1774           // them. Otherwise we might want to skip zero bytes we see.
1775           if (!DisassembleZeroes) {
1776             uint64_t MaxOffset = End - Index;
1777             // For --reloc: print zero blocks patched by relocations, so that
1778             // relocations can be shown in the dump.
1779             if (RelCur != RelEnd)
1780               MaxOffset = std::min(RelCur->getOffset() - RelAdjustment - Index,
1781                                    MaxOffset);
1782 
1783             if (size_t N =
1784                     countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) {
1785               FOS << "\t\t..." << '\n';
1786               Index += N;
1787               continue;
1788             }
1789           }
1790 
1791           // Print local label if there's any.
1792           auto Iter1 = BBAddrMapLabels.find(SectionAddr + Index);
1793           if (Iter1 != BBAddrMapLabels.end()) {
1794             for (StringRef Label : Iter1->second)
1795               FOS << "<" << Label << ">:\n";
1796           } else {
1797             auto Iter2 = AllLabels.find(SectionAddr + Index);
1798             if (Iter2 != AllLabels.end())
1799               FOS << "<" << Iter2->second << ">:\n";
1800           }
1801 
1802           // Disassemble a real instruction or a data when disassemble all is
1803           // provided
1804           MCInst Inst;
1805           ArrayRef<uint8_t> ThisBytes = Bytes.slice(Index);
1806           uint64_t ThisAddr = SectionAddr + Index;
1807           bool Disassembled = DisAsm->getInstruction(Inst, Size, ThisBytes,
1808                                                      ThisAddr, CommentStream);
1809           if (Size == 0)
1810             Size = std::min<uint64_t>(
1811                 ThisBytes.size(),
1812                 DisAsm->suggestBytesToSkip(ThisBytes, ThisAddr));
1813 
1814           LVP.update({Index, Section.getIndex()},
1815                      {Index + Size, Section.getIndex()}, Index + Size != End);
1816 
1817           IP->setCommentStream(CommentStream);
1818 
1819           PIP.printInst(
1820               *IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size),
1821               {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, FOS,
1822               "", *STI, &SP, Obj.getFileName(), &Rels, LVP);
1823 
1824           IP->setCommentStream(llvm::nulls());
1825 
1826           // If disassembly has failed, avoid analysing invalid/incomplete
1827           // instruction information. Otherwise, try to resolve the target
1828           // address (jump target or memory operand address) and print it on the
1829           // right of the instruction.
1830           if (Disassembled && MIA) {
1831             // Branch targets are printed just after the instructions.
1832             llvm::raw_ostream *TargetOS = &FOS;
1833             uint64_t Target;
1834             bool PrintTarget =
1835                 MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target);
1836             if (!PrintTarget)
1837               if (std::optional<uint64_t> MaybeTarget =
1838                       MIA->evaluateMemoryOperandAddress(
1839                           Inst, STI, SectionAddr + Index, Size)) {
1840                 Target = *MaybeTarget;
1841                 PrintTarget = true;
1842                 // Do not print real address when symbolizing.
1843                 if (!SymbolizeOperands) {
1844                   // Memory operand addresses are printed as comments.
1845                   TargetOS = &CommentStream;
1846                   *TargetOS << "0x" << Twine::utohexstr(Target);
1847                 }
1848               }
1849             if (PrintTarget) {
1850               // In a relocatable object, the target's section must reside in
1851               // the same section as the call instruction or it is accessed
1852               // through a relocation.
1853               //
1854               // In a non-relocatable object, the target may be in any section.
1855               // In that case, locate the section(s) containing the target
1856               // address and find the symbol in one of those, if possible.
1857               //
1858               // N.B. We don't walk the relocations in the relocatable case yet.
1859               std::vector<const SectionSymbolsTy *> TargetSectionSymbols;
1860               if (!Obj.isRelocatableObject()) {
1861                 auto It = llvm::partition_point(
1862                     SectionAddresses,
1863                     [=](const std::pair<uint64_t, SectionRef> &O) {
1864                       return O.first <= Target;
1865                     });
1866                 uint64_t TargetSecAddr = 0;
1867                 while (It != SectionAddresses.begin()) {
1868                   --It;
1869                   if (TargetSecAddr == 0)
1870                     TargetSecAddr = It->first;
1871                   if (It->first != TargetSecAddr)
1872                     break;
1873                   TargetSectionSymbols.push_back(&AllSymbols[It->second]);
1874                 }
1875               } else {
1876                 TargetSectionSymbols.push_back(&Symbols);
1877               }
1878               TargetSectionSymbols.push_back(&AbsoluteSymbols);
1879 
1880               // Find the last symbol in the first candidate section whose
1881               // offset is less than or equal to the target. If there are no
1882               // such symbols, try in the next section and so on, before finally
1883               // using the nearest preceding absolute symbol (if any), if there
1884               // are no other valid symbols.
1885               const SymbolInfoTy *TargetSym = nullptr;
1886               for (const SectionSymbolsTy *TargetSymbols :
1887                    TargetSectionSymbols) {
1888                 auto It = llvm::partition_point(
1889                     *TargetSymbols,
1890                     [=](const SymbolInfoTy &O) { return O.Addr <= Target; });
1891                 while (It != TargetSymbols->begin()) {
1892                   --It;
1893                   // Skip mapping symbols to avoid possible ambiguity as they
1894                   // do not allow uniquely identifying the target address.
1895                   if (!hasMappingSymbols(Obj) || !isMappingSymbol(*It)) {
1896                     TargetSym = &*It;
1897                     break;
1898                   }
1899                 }
1900                 if (TargetSym)
1901                   break;
1902               }
1903 
1904               // Print the labels corresponding to the target if there's any.
1905               bool BBAddrMapLabelAvailable = BBAddrMapLabels.count(Target);
1906               bool LabelAvailable = AllLabels.count(Target);
1907               if (TargetSym != nullptr) {
1908                 uint64_t TargetAddress = TargetSym->Addr;
1909                 uint64_t Disp = Target - TargetAddress;
1910                 std::string TargetName = TargetSym->Name.str();
1911                 if (Demangle)
1912                   TargetName = demangle(TargetName);
1913 
1914                 *TargetOS << " <";
1915                 if (!Disp) {
1916                   // Always Print the binary symbol precisely corresponding to
1917                   // the target address.
1918                   *TargetOS << TargetName;
1919                 } else if (BBAddrMapLabelAvailable) {
1920                   *TargetOS << BBAddrMapLabels[Target].front();
1921                 } else if (LabelAvailable) {
1922                   *TargetOS << AllLabels[Target];
1923                 } else {
1924                   // Always Print the binary symbol plus an offset if there's no
1925                   // local label corresponding to the target address.
1926                   *TargetOS << TargetName << "+0x" << Twine::utohexstr(Disp);
1927                 }
1928                 *TargetOS << ">";
1929               } else if (BBAddrMapLabelAvailable) {
1930                 *TargetOS << " <" << BBAddrMapLabels[Target].front() << ">";
1931               } else if (LabelAvailable) {
1932                 *TargetOS << " <" << AllLabels[Target] << ">";
1933               }
1934               // By convention, each record in the comment stream should be
1935               // terminated.
1936               if (TargetOS == &CommentStream)
1937                 *TargetOS << "\n";
1938             }
1939           }
1940         }
1941 
1942         assert(Ctx.getAsmInfo());
1943         emitPostInstructionInfo(FOS, *Ctx.getAsmInfo(), *STI,
1944                                 CommentStream.str(), LVP);
1945         Comments.clear();
1946 
1947         // Hexagon does this in pretty printer
1948         if (Obj.getArch() != Triple::hexagon) {
1949           // Print relocation for instruction and data.
1950           while (RelCur != RelEnd) {
1951             uint64_t Offset = RelCur->getOffset() - RelAdjustment;
1952             // If this relocation is hidden, skip it.
1953             if (getHidden(*RelCur) || SectionAddr + Offset < StartAddress) {
1954               ++RelCur;
1955               continue;
1956             }
1957 
1958             // Stop when RelCur's offset is past the disassembled
1959             // instruction/data. Note that it's possible the disassembled data
1960             // is not the complete data: we might see the relocation printed in
1961             // the middle of the data, but this matches the binutils objdump
1962             // output.
1963             if (Offset >= Index + Size)
1964               break;
1965 
1966             // When --adjust-vma is used, update the address printed.
1967             if (RelCur->getSymbol() != Obj.symbol_end()) {
1968               Expected<section_iterator> SymSI =
1969                   RelCur->getSymbol()->getSection();
1970               if (SymSI && *SymSI != Obj.section_end() &&
1971                   shouldAdjustVA(**SymSI))
1972                 Offset += AdjustVMA;
1973             }
1974 
1975             printRelocation(FOS, Obj.getFileName(), *RelCur,
1976                             SectionAddr + Offset, Is64Bits);
1977             LVP.printAfterOtherLine(FOS, true);
1978             ++RelCur;
1979           }
1980         }
1981 
1982         Index += Size;
1983       }
1984     }
1985   }
1986   StringSet<> MissingDisasmSymbolSet =
1987       set_difference(DisasmSymbolSet, FoundDisasmSymbolSet);
1988   for (StringRef Sym : MissingDisasmSymbolSet.keys())
1989     reportWarning("failed to disassemble missing symbol " + Sym, FileName);
1990 }
1991 
disassembleObject(ObjectFile * Obj,bool InlineRelocs)1992 static void disassembleObject(ObjectFile *Obj, bool InlineRelocs) {
1993   // If information useful for showing the disassembly is missing, try to find a
1994   // more complete binary and disassemble that instead.
1995   OwningBinary<Binary> FetchedBinary;
1996   if (Obj->symbols().empty()) {
1997     if (std::optional<OwningBinary<Binary>> FetchedBinaryOpt =
1998             fetchBinaryByBuildID(*Obj)) {
1999       if (auto *O = dyn_cast<ObjectFile>(FetchedBinaryOpt->getBinary())) {
2000         if (!O->symbols().empty() ||
2001             (!O->sections().empty() && Obj->sections().empty())) {
2002           FetchedBinary = std::move(*FetchedBinaryOpt);
2003           Obj = O;
2004         }
2005       }
2006     }
2007   }
2008 
2009   const Target *TheTarget = getTarget(Obj);
2010 
2011   // Package up features to be passed to target/subtarget
2012   Expected<SubtargetFeatures> FeaturesValue = Obj->getFeatures();
2013   if (!FeaturesValue)
2014     reportError(FeaturesValue.takeError(), Obj->getFileName());
2015   SubtargetFeatures Features = *FeaturesValue;
2016   if (!MAttrs.empty()) {
2017     for (unsigned I = 0; I != MAttrs.size(); ++I)
2018       Features.AddFeature(MAttrs[I]);
2019   } else if (MCPU.empty() && Obj->getArch() == llvm::Triple::aarch64) {
2020     Features.AddFeature("+all");
2021   }
2022 
2023   std::unique_ptr<const MCRegisterInfo> MRI(
2024       TheTarget->createMCRegInfo(TripleName));
2025   if (!MRI)
2026     reportError(Obj->getFileName(),
2027                 "no register info for target " + TripleName);
2028 
2029   // Set up disassembler.
2030   MCTargetOptions MCOptions;
2031   std::unique_ptr<const MCAsmInfo> AsmInfo(
2032       TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
2033   if (!AsmInfo)
2034     reportError(Obj->getFileName(),
2035                 "no assembly info for target " + TripleName);
2036 
2037   if (MCPU.empty())
2038     MCPU = Obj->tryGetCPUName().value_or("").str();
2039 
2040   if (isArmElf(*Obj)) {
2041     // When disassembling big-endian Arm ELF, the instruction endianness is
2042     // determined in a complex way. In relocatable objects, AAELF32 mandates
2043     // that instruction endianness matches the ELF file endianness; in
2044     // executable images, that's true unless the file header has the EF_ARM_BE8
2045     // flag, in which case instructions are little-endian regardless of data
2046     // endianness.
2047     //
2048     // We must set the big-endian-instructions SubtargetFeature to make the
2049     // disassembler read the instructions the right way round, and also tell
2050     // our own prettyprinter to retrieve the encodings the same way to print in
2051     // hex.
2052     const auto *Elf32BE = dyn_cast<ELF32BEObjectFile>(Obj);
2053 
2054     if (Elf32BE && (Elf32BE->isRelocatableObject() ||
2055                     !(Elf32BE->getPlatformFlags() & ELF::EF_ARM_BE8))) {
2056       Features.AddFeature("+big-endian-instructions");
2057       ARMPrettyPrinterInst.setInstructionEndianness(llvm::support::big);
2058     } else {
2059       ARMPrettyPrinterInst.setInstructionEndianness(llvm::support::little);
2060     }
2061   }
2062 
2063   std::unique_ptr<const MCSubtargetInfo> STI(
2064       TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
2065   if (!STI)
2066     reportError(Obj->getFileName(),
2067                 "no subtarget info for target " + TripleName);
2068   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
2069   if (!MII)
2070     reportError(Obj->getFileName(),
2071                 "no instruction info for target " + TripleName);
2072   MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
2073   // FIXME: for now initialize MCObjectFileInfo with default values
2074   std::unique_ptr<MCObjectFileInfo> MOFI(
2075       TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
2076   Ctx.setObjectFileInfo(MOFI.get());
2077 
2078   std::unique_ptr<MCDisassembler> DisAsm(
2079       TheTarget->createMCDisassembler(*STI, Ctx));
2080   if (!DisAsm)
2081     reportError(Obj->getFileName(), "no disassembler for target " + TripleName);
2082 
2083   // If we have an ARM object file, we need a second disassembler, because
2084   // ARM CPUs have two different instruction sets: ARM mode, and Thumb mode.
2085   // We use mapping symbols to switch between the two assemblers, where
2086   // appropriate.
2087   std::unique_ptr<MCDisassembler> SecondaryDisAsm;
2088   std::unique_ptr<const MCSubtargetInfo> SecondarySTI;
2089   if (isArmElf(*Obj) && !STI->checkFeatures("+mclass")) {
2090     if (STI->checkFeatures("+thumb-mode"))
2091       Features.AddFeature("-thumb-mode");
2092     else
2093       Features.AddFeature("+thumb-mode");
2094     SecondarySTI.reset(TheTarget->createMCSubtargetInfo(TripleName, MCPU,
2095                                                         Features.getString()));
2096     SecondaryDisAsm.reset(TheTarget->createMCDisassembler(*SecondarySTI, Ctx));
2097   }
2098 
2099   std::unique_ptr<const MCInstrAnalysis> MIA(
2100       TheTarget->createMCInstrAnalysis(MII.get()));
2101 
2102   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
2103   std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
2104       Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
2105   if (!IP)
2106     reportError(Obj->getFileName(),
2107                 "no instruction printer for target " + TripleName);
2108   IP->setPrintImmHex(PrintImmHex);
2109   IP->setPrintBranchImmAsAddress(true);
2110   IP->setSymbolizeOperands(SymbolizeOperands);
2111   IP->setMCInstrAnalysis(MIA.get());
2112 
2113   PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
2114 
2115   const ObjectFile *DbgObj = Obj;
2116   if (!FetchedBinary.getBinary() && !Obj->hasDebugInfo()) {
2117     if (std::optional<OwningBinary<Binary>> DebugBinaryOpt =
2118             fetchBinaryByBuildID(*Obj)) {
2119       if (auto *FetchedObj =
2120               dyn_cast<const ObjectFile>(DebugBinaryOpt->getBinary())) {
2121         if (FetchedObj->hasDebugInfo()) {
2122           FetchedBinary = std::move(*DebugBinaryOpt);
2123           DbgObj = FetchedObj;
2124         }
2125       }
2126     }
2127   }
2128 
2129   std::unique_ptr<object::Binary> DSYMBinary;
2130   std::unique_ptr<MemoryBuffer> DSYMBuf;
2131   if (!DbgObj->hasDebugInfo()) {
2132     if (const MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&*Obj)) {
2133       DbgObj = objdump::getMachODSymObject(MachOOF, Obj->getFileName(),
2134                                            DSYMBinary, DSYMBuf);
2135       if (!DbgObj)
2136         return;
2137     }
2138   }
2139 
2140   SourcePrinter SP(DbgObj, TheTarget->getName());
2141 
2142   for (StringRef Opt : DisassemblerOptions)
2143     if (!IP->applyTargetSpecificCLOption(Opt))
2144       reportError(Obj->getFileName(),
2145                   "Unrecognized disassembler option: " + Opt);
2146 
2147   disassembleObject(TheTarget, *Obj, *DbgObj, Ctx, DisAsm.get(),
2148                     SecondaryDisAsm.get(), MIA.get(), IP.get(), STI.get(),
2149                     SecondarySTI.get(), PIP, SP, InlineRelocs);
2150 }
2151 
printRelocations(const ObjectFile * Obj)2152 void objdump::printRelocations(const ObjectFile *Obj) {
2153   StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 :
2154                                                  "%08" PRIx64;
2155 
2156   // Build a mapping from relocation target to a vector of relocation
2157   // sections. Usually, there is an only one relocation section for
2158   // each relocated section.
2159   MapVector<SectionRef, std::vector<SectionRef>> SecToRelSec;
2160   uint64_t Ndx;
2161   for (const SectionRef &Section : ToolSectionFilter(*Obj, &Ndx)) {
2162     if (Obj->isELF() && (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC))
2163       continue;
2164     if (Section.relocation_begin() == Section.relocation_end())
2165       continue;
2166     Expected<section_iterator> SecOrErr = Section.getRelocatedSection();
2167     if (!SecOrErr)
2168       reportError(Obj->getFileName(),
2169                   "section (" + Twine(Ndx) +
2170                       "): unable to get a relocation target: " +
2171                       toString(SecOrErr.takeError()));
2172     SecToRelSec[**SecOrErr].push_back(Section);
2173   }
2174 
2175   for (std::pair<SectionRef, std::vector<SectionRef>> &P : SecToRelSec) {
2176     StringRef SecName = unwrapOrError(P.first.getName(), Obj->getFileName());
2177     outs() << "\nRELOCATION RECORDS FOR [" << SecName << "]:\n";
2178     uint32_t OffsetPadding = (Obj->getBytesInAddress() > 4 ? 16 : 8);
2179     uint32_t TypePadding = 24;
2180     outs() << left_justify("OFFSET", OffsetPadding) << " "
2181            << left_justify("TYPE", TypePadding) << " "
2182            << "VALUE\n";
2183 
2184     for (SectionRef Section : P.second) {
2185       for (const RelocationRef &Reloc : Section.relocations()) {
2186         uint64_t Address = Reloc.getOffset();
2187         SmallString<32> RelocName;
2188         SmallString<32> ValueStr;
2189         if (Address < StartAddress || Address > StopAddress || getHidden(Reloc))
2190           continue;
2191         Reloc.getTypeName(RelocName);
2192         if (Error E = getRelocationValueString(Reloc, ValueStr))
2193           reportError(std::move(E), Obj->getFileName());
2194 
2195         outs() << format(Fmt.data(), Address) << " "
2196                << left_justify(RelocName, TypePadding) << " " << ValueStr
2197                << "\n";
2198       }
2199     }
2200   }
2201 }
2202 
printDynamicRelocations(const ObjectFile * Obj)2203 void objdump::printDynamicRelocations(const ObjectFile *Obj) {
2204   // For the moment, this option is for ELF only
2205   if (!Obj->isELF())
2206     return;
2207 
2208   const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
2209   if (!Elf || !any_of(Elf->sections(), [](const ELFSectionRef Sec) {
2210         return Sec.getType() == ELF::SHT_DYNAMIC;
2211       })) {
2212     reportError(Obj->getFileName(), "not a dynamic object");
2213     return;
2214   }
2215 
2216   std::vector<SectionRef> DynRelSec = Obj->dynamic_relocation_sections();
2217   if (DynRelSec.empty())
2218     return;
2219 
2220   outs() << "\nDYNAMIC RELOCATION RECORDS\n";
2221   const uint32_t OffsetPadding = (Obj->getBytesInAddress() > 4 ? 16 : 8);
2222   const uint32_t TypePadding = 24;
2223   outs() << left_justify("OFFSET", OffsetPadding) << ' '
2224          << left_justify("TYPE", TypePadding) << " VALUE\n";
2225 
2226   StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
2227   for (const SectionRef &Section : DynRelSec)
2228     for (const RelocationRef &Reloc : Section.relocations()) {
2229       uint64_t Address = Reloc.getOffset();
2230       SmallString<32> RelocName;
2231       SmallString<32> ValueStr;
2232       Reloc.getTypeName(RelocName);
2233       if (Error E = getRelocationValueString(Reloc, ValueStr))
2234         reportError(std::move(E), Obj->getFileName());
2235       outs() << format(Fmt.data(), Address) << ' '
2236              << left_justify(RelocName, TypePadding) << ' ' << ValueStr << '\n';
2237     }
2238 }
2239 
2240 // Returns true if we need to show LMA column when dumping section headers. We
2241 // show it only when the platform is ELF and either we have at least one section
2242 // whose VMA and LMA are different and/or when --show-lma flag is used.
shouldDisplayLMA(const ObjectFile & Obj)2243 static bool shouldDisplayLMA(const ObjectFile &Obj) {
2244   if (!Obj.isELF())
2245     return false;
2246   for (const SectionRef &S : ToolSectionFilter(Obj))
2247     if (S.getAddress() != getELFSectionLMA(S))
2248       return true;
2249   return ShowLMA;
2250 }
2251 
getMaxSectionNameWidth(const ObjectFile & Obj)2252 static size_t getMaxSectionNameWidth(const ObjectFile &Obj) {
2253   // Default column width for names is 13 even if no names are that long.
2254   size_t MaxWidth = 13;
2255   for (const SectionRef &Section : ToolSectionFilter(Obj)) {
2256     StringRef Name = unwrapOrError(Section.getName(), Obj.getFileName());
2257     MaxWidth = std::max(MaxWidth, Name.size());
2258   }
2259   return MaxWidth;
2260 }
2261 
printSectionHeaders(ObjectFile & Obj)2262 void objdump::printSectionHeaders(ObjectFile &Obj) {
2263   if (Obj.isELF() && Obj.sections().empty())
2264     createFakeELFSections(Obj);
2265 
2266   size_t NameWidth = getMaxSectionNameWidth(Obj);
2267   size_t AddressWidth = 2 * Obj.getBytesInAddress();
2268   bool HasLMAColumn = shouldDisplayLMA(Obj);
2269   outs() << "\nSections:\n";
2270   if (HasLMAColumn)
2271     outs() << "Idx " << left_justify("Name", NameWidth) << " Size     "
2272            << left_justify("VMA", AddressWidth) << " "
2273            << left_justify("LMA", AddressWidth) << " Type\n";
2274   else
2275     outs() << "Idx " << left_justify("Name", NameWidth) << " Size     "
2276            << left_justify("VMA", AddressWidth) << " Type\n";
2277 
2278   uint64_t Idx;
2279   for (const SectionRef &Section : ToolSectionFilter(Obj, &Idx)) {
2280     StringRef Name = unwrapOrError(Section.getName(), Obj.getFileName());
2281     uint64_t VMA = Section.getAddress();
2282     if (shouldAdjustVA(Section))
2283       VMA += AdjustVMA;
2284 
2285     uint64_t Size = Section.getSize();
2286 
2287     std::string Type = Section.isText() ? "TEXT" : "";
2288     if (Section.isData())
2289       Type += Type.empty() ? "DATA" : ", DATA";
2290     if (Section.isBSS())
2291       Type += Type.empty() ? "BSS" : ", BSS";
2292     if (Section.isDebugSection())
2293       Type += Type.empty() ? "DEBUG" : ", DEBUG";
2294 
2295     if (HasLMAColumn)
2296       outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth,
2297                        Name.str().c_str(), Size)
2298              << format_hex_no_prefix(VMA, AddressWidth) << " "
2299              << format_hex_no_prefix(getELFSectionLMA(Section), AddressWidth)
2300              << " " << Type << "\n";
2301     else
2302       outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth,
2303                        Name.str().c_str(), Size)
2304              << format_hex_no_prefix(VMA, AddressWidth) << " " << Type << "\n";
2305   }
2306 }
2307 
printSectionContents(const ObjectFile * Obj)2308 void objdump::printSectionContents(const ObjectFile *Obj) {
2309   const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj);
2310 
2311   for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
2312     StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
2313     uint64_t BaseAddr = Section.getAddress();
2314     uint64_t Size = Section.getSize();
2315     if (!Size)
2316       continue;
2317 
2318     outs() << "Contents of section ";
2319     StringRef SegmentName = getSegmentName(MachO, Section);
2320     if (!SegmentName.empty())
2321       outs() << SegmentName << ",";
2322     outs() << Name << ":\n";
2323     if (Section.isBSS()) {
2324       outs() << format("<skipping contents of bss section at [%04" PRIx64
2325                        ", %04" PRIx64 ")>\n",
2326                        BaseAddr, BaseAddr + Size);
2327       continue;
2328     }
2329 
2330     StringRef Contents = unwrapOrError(Section.getContents(), Obj->getFileName());
2331 
2332     // Dump out the content as hex and printable ascii characters.
2333     for (std::size_t Addr = 0, End = Contents.size(); Addr < End; Addr += 16) {
2334       outs() << format(" %04" PRIx64 " ", BaseAddr + Addr);
2335       // Dump line of hex.
2336       for (std::size_t I = 0; I < 16; ++I) {
2337         if (I != 0 && I % 4 == 0)
2338           outs() << ' ';
2339         if (Addr + I < End)
2340           outs() << hexdigit((Contents[Addr + I] >> 4) & 0xF, true)
2341                  << hexdigit(Contents[Addr + I] & 0xF, true);
2342         else
2343           outs() << "  ";
2344       }
2345       // Print ascii.
2346       outs() << "  ";
2347       for (std::size_t I = 0; I < 16 && Addr + I < End; ++I) {
2348         if (isPrint(static_cast<unsigned char>(Contents[Addr + I]) & 0xFF))
2349           outs() << Contents[Addr + I];
2350         else
2351           outs() << ".";
2352       }
2353       outs() << "\n";
2354     }
2355   }
2356 }
2357 
printSymbolTable(const ObjectFile & O,StringRef ArchiveName,StringRef ArchitectureName,bool DumpDynamic)2358 void objdump::printSymbolTable(const ObjectFile &O, StringRef ArchiveName,
2359                                StringRef ArchitectureName, bool DumpDynamic) {
2360   if (O.isCOFF() && !DumpDynamic) {
2361     outs() << "\nSYMBOL TABLE:\n";
2362     printCOFFSymbolTable(cast<const COFFObjectFile>(O));
2363     return;
2364   }
2365 
2366   const StringRef FileName = O.getFileName();
2367 
2368   if (!DumpDynamic) {
2369     outs() << "\nSYMBOL TABLE:\n";
2370     for (auto I = O.symbol_begin(); I != O.symbol_end(); ++I)
2371       printSymbol(O, *I, {}, FileName, ArchiveName, ArchitectureName,
2372                   DumpDynamic);
2373     return;
2374   }
2375 
2376   outs() << "\nDYNAMIC SYMBOL TABLE:\n";
2377   if (!O.isELF()) {
2378     reportWarning(
2379         "this operation is not currently supported for this file format",
2380         FileName);
2381     return;
2382   }
2383 
2384   const ELFObjectFileBase *ELF = cast<const ELFObjectFileBase>(&O);
2385   auto Symbols = ELF->getDynamicSymbolIterators();
2386   Expected<std::vector<VersionEntry>> SymbolVersionsOrErr =
2387       ELF->readDynsymVersions();
2388   if (!SymbolVersionsOrErr) {
2389     reportWarning(toString(SymbolVersionsOrErr.takeError()), FileName);
2390     SymbolVersionsOrErr = std::vector<VersionEntry>();
2391     (void)!SymbolVersionsOrErr;
2392   }
2393   for (auto &Sym : Symbols)
2394     printSymbol(O, Sym, *SymbolVersionsOrErr, FileName, ArchiveName,
2395                 ArchitectureName, DumpDynamic);
2396 }
2397 
printSymbol(const ObjectFile & O,const SymbolRef & Symbol,ArrayRef<VersionEntry> SymbolVersions,StringRef FileName,StringRef ArchiveName,StringRef ArchitectureName,bool DumpDynamic)2398 void objdump::printSymbol(const ObjectFile &O, const SymbolRef &Symbol,
2399                           ArrayRef<VersionEntry> SymbolVersions,
2400                           StringRef FileName, StringRef ArchiveName,
2401                           StringRef ArchitectureName, bool DumpDynamic) {
2402   const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(&O);
2403   uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName, ArchiveName,
2404                                    ArchitectureName);
2405   if ((Address < StartAddress) || (Address > StopAddress))
2406     return;
2407   SymbolRef::Type Type =
2408       unwrapOrError(Symbol.getType(), FileName, ArchiveName, ArchitectureName);
2409   uint32_t Flags =
2410       unwrapOrError(Symbol.getFlags(), FileName, ArchiveName, ArchitectureName);
2411 
2412   // Don't ask a Mach-O STAB symbol for its section unless you know that
2413   // STAB symbol's section field refers to a valid section index. Otherwise
2414   // the symbol may error trying to load a section that does not exist.
2415   bool IsSTAB = false;
2416   if (MachO) {
2417     DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
2418     uint8_t NType =
2419         (MachO->is64Bit() ? MachO->getSymbol64TableEntry(SymDRI).n_type
2420                           : MachO->getSymbolTableEntry(SymDRI).n_type);
2421     if (NType & MachO::N_STAB)
2422       IsSTAB = true;
2423   }
2424   section_iterator Section = IsSTAB
2425                                  ? O.section_end()
2426                                  : unwrapOrError(Symbol.getSection(), FileName,
2427                                                  ArchiveName, ArchitectureName);
2428 
2429   StringRef Name;
2430   if (Type == SymbolRef::ST_Debug && Section != O.section_end()) {
2431     if (Expected<StringRef> NameOrErr = Section->getName())
2432       Name = *NameOrErr;
2433     else
2434       consumeError(NameOrErr.takeError());
2435 
2436   } else {
2437     Name = unwrapOrError(Symbol.getName(), FileName, ArchiveName,
2438                          ArchitectureName);
2439   }
2440 
2441   bool Global = Flags & SymbolRef::SF_Global;
2442   bool Weak = Flags & SymbolRef::SF_Weak;
2443   bool Absolute = Flags & SymbolRef::SF_Absolute;
2444   bool Common = Flags & SymbolRef::SF_Common;
2445   bool Hidden = Flags & SymbolRef::SF_Hidden;
2446 
2447   char GlobLoc = ' ';
2448   if ((Section != O.section_end() || Absolute) && !Weak)
2449     GlobLoc = Global ? 'g' : 'l';
2450   char IFunc = ' ';
2451   if (O.isELF()) {
2452     if (ELFSymbolRef(Symbol).getELFType() == ELF::STT_GNU_IFUNC)
2453       IFunc = 'i';
2454     if (ELFSymbolRef(Symbol).getBinding() == ELF::STB_GNU_UNIQUE)
2455       GlobLoc = 'u';
2456   }
2457 
2458   char Debug = ' ';
2459   if (DumpDynamic)
2460     Debug = 'D';
2461   else if (Type == SymbolRef::ST_Debug || Type == SymbolRef::ST_File)
2462     Debug = 'd';
2463 
2464   char FileFunc = ' ';
2465   if (Type == SymbolRef::ST_File)
2466     FileFunc = 'f';
2467   else if (Type == SymbolRef::ST_Function)
2468     FileFunc = 'F';
2469   else if (Type == SymbolRef::ST_Data)
2470     FileFunc = 'O';
2471 
2472   const char *Fmt = O.getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
2473 
2474   outs() << format(Fmt, Address) << " "
2475          << GlobLoc            // Local -> 'l', Global -> 'g', Neither -> ' '
2476          << (Weak ? 'w' : ' ') // Weak?
2477          << ' '                // Constructor. Not supported yet.
2478          << ' '                // Warning. Not supported yet.
2479          << IFunc              // Indirect reference to another symbol.
2480          << Debug              // Debugging (d) or dynamic (D) symbol.
2481          << FileFunc           // Name of function (F), file (f) or object (O).
2482          << ' ';
2483   if (Absolute) {
2484     outs() << "*ABS*";
2485   } else if (Common) {
2486     outs() << "*COM*";
2487   } else if (Section == O.section_end()) {
2488     if (O.isXCOFF()) {
2489       XCOFFSymbolRef XCOFFSym = cast<const XCOFFObjectFile>(O).toSymbolRef(
2490           Symbol.getRawDataRefImpl());
2491       if (XCOFF::N_DEBUG == XCOFFSym.getSectionNumber())
2492         outs() << "*DEBUG*";
2493       else
2494         outs() << "*UND*";
2495     } else
2496       outs() << "*UND*";
2497   } else {
2498     StringRef SegmentName = getSegmentName(MachO, *Section);
2499     if (!SegmentName.empty())
2500       outs() << SegmentName << ",";
2501     StringRef SectionName = unwrapOrError(Section->getName(), FileName);
2502     outs() << SectionName;
2503     if (O.isXCOFF()) {
2504       std::optional<SymbolRef> SymRef =
2505           getXCOFFSymbolContainingSymbolRef(cast<XCOFFObjectFile>(O), Symbol);
2506       if (SymRef) {
2507 
2508         Expected<StringRef> NameOrErr = SymRef->getName();
2509 
2510         if (NameOrErr) {
2511           outs() << " (csect:";
2512           std::string SymName(NameOrErr.get());
2513 
2514           if (Demangle)
2515             SymName = demangle(SymName);
2516 
2517           if (SymbolDescription)
2518             SymName = getXCOFFSymbolDescription(createSymbolInfo(O, *SymRef),
2519                                                 SymName);
2520 
2521           outs() << ' ' << SymName;
2522           outs() << ") ";
2523         } else
2524           reportWarning(toString(NameOrErr.takeError()), FileName);
2525       }
2526     }
2527   }
2528 
2529   if (Common)
2530     outs() << '\t' << format(Fmt, static_cast<uint64_t>(Symbol.getAlignment()));
2531   else if (O.isXCOFF())
2532     outs() << '\t'
2533            << format(Fmt, cast<XCOFFObjectFile>(O).getSymbolSize(
2534                               Symbol.getRawDataRefImpl()));
2535   else if (O.isELF())
2536     outs() << '\t' << format(Fmt, ELFSymbolRef(Symbol).getSize());
2537 
2538   if (O.isELF()) {
2539     if (!SymbolVersions.empty()) {
2540       const VersionEntry &Ver =
2541           SymbolVersions[Symbol.getRawDataRefImpl().d.b - 1];
2542       std::string Str;
2543       if (!Ver.Name.empty())
2544         Str = Ver.IsVerDef ? ' ' + Ver.Name : '(' + Ver.Name + ')';
2545       outs() << ' ' << left_justify(Str, 12);
2546     }
2547 
2548     uint8_t Other = ELFSymbolRef(Symbol).getOther();
2549     switch (Other) {
2550     case ELF::STV_DEFAULT:
2551       break;
2552     case ELF::STV_INTERNAL:
2553       outs() << " .internal";
2554       break;
2555     case ELF::STV_HIDDEN:
2556       outs() << " .hidden";
2557       break;
2558     case ELF::STV_PROTECTED:
2559       outs() << " .protected";
2560       break;
2561     default:
2562       outs() << format(" 0x%02x", Other);
2563       break;
2564     }
2565   } else if (Hidden) {
2566     outs() << " .hidden";
2567   }
2568 
2569   std::string SymName(Name);
2570   if (Demangle)
2571     SymName = demangle(SymName);
2572 
2573   if (O.isXCOFF() && SymbolDescription)
2574     SymName = getXCOFFSymbolDescription(createSymbolInfo(O, Symbol), SymName);
2575 
2576   outs() << ' ' << SymName << '\n';
2577 }
2578 
printUnwindInfo(const ObjectFile * O)2579 static void printUnwindInfo(const ObjectFile *O) {
2580   outs() << "Unwind info:\n\n";
2581 
2582   if (const COFFObjectFile *Coff = dyn_cast<COFFObjectFile>(O))
2583     printCOFFUnwindInfo(Coff);
2584   else if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(O))
2585     printMachOUnwindInfo(MachO);
2586   else
2587     // TODO: Extract DWARF dump tool to objdump.
2588     WithColor::error(errs(), ToolName)
2589         << "This operation is only currently supported "
2590            "for COFF and MachO object files.\n";
2591 }
2592 
2593 /// Dump the raw contents of the __clangast section so the output can be piped
2594 /// into llvm-bcanalyzer.
printRawClangAST(const ObjectFile * Obj)2595 static void printRawClangAST(const ObjectFile *Obj) {
2596   if (outs().is_displayed()) {
2597     WithColor::error(errs(), ToolName)
2598         << "The -raw-clang-ast option will dump the raw binary contents of "
2599            "the clang ast section.\n"
2600            "Please redirect the output to a file or another program such as "
2601            "llvm-bcanalyzer.\n";
2602     return;
2603   }
2604 
2605   StringRef ClangASTSectionName("__clangast");
2606   if (Obj->isCOFF()) {
2607     ClangASTSectionName = "clangast";
2608   }
2609 
2610   std::optional<object::SectionRef> ClangASTSection;
2611   for (auto Sec : ToolSectionFilter(*Obj)) {
2612     StringRef Name;
2613     if (Expected<StringRef> NameOrErr = Sec.getName())
2614       Name = *NameOrErr;
2615     else
2616       consumeError(NameOrErr.takeError());
2617 
2618     if (Name == ClangASTSectionName) {
2619       ClangASTSection = Sec;
2620       break;
2621     }
2622   }
2623   if (!ClangASTSection)
2624     return;
2625 
2626   StringRef ClangASTContents =
2627       unwrapOrError(ClangASTSection->getContents(), Obj->getFileName());
2628   outs().write(ClangASTContents.data(), ClangASTContents.size());
2629 }
2630 
printFaultMaps(const ObjectFile * Obj)2631 static void printFaultMaps(const ObjectFile *Obj) {
2632   StringRef FaultMapSectionName;
2633 
2634   if (Obj->isELF()) {
2635     FaultMapSectionName = ".llvm_faultmaps";
2636   } else if (Obj->isMachO()) {
2637     FaultMapSectionName = "__llvm_faultmaps";
2638   } else {
2639     WithColor::error(errs(), ToolName)
2640         << "This operation is only currently supported "
2641            "for ELF and Mach-O executable files.\n";
2642     return;
2643   }
2644 
2645   std::optional<object::SectionRef> FaultMapSection;
2646 
2647   for (auto Sec : ToolSectionFilter(*Obj)) {
2648     StringRef Name;
2649     if (Expected<StringRef> NameOrErr = Sec.getName())
2650       Name = *NameOrErr;
2651     else
2652       consumeError(NameOrErr.takeError());
2653 
2654     if (Name == FaultMapSectionName) {
2655       FaultMapSection = Sec;
2656       break;
2657     }
2658   }
2659 
2660   outs() << "FaultMap table:\n";
2661 
2662   if (!FaultMapSection) {
2663     outs() << "<not found>\n";
2664     return;
2665   }
2666 
2667   StringRef FaultMapContents =
2668       unwrapOrError(FaultMapSection->getContents(), Obj->getFileName());
2669   FaultMapParser FMP(FaultMapContents.bytes_begin(),
2670                      FaultMapContents.bytes_end());
2671 
2672   outs() << FMP;
2673 }
2674 
printPrivateFileHeaders(const ObjectFile * O,bool OnlyFirst)2675 static void printPrivateFileHeaders(const ObjectFile *O, bool OnlyFirst) {
2676   if (O->isELF()) {
2677     printELFFileHeader(O);
2678     printELFDynamicSection(O);
2679     printELFSymbolVersionInfo(O);
2680     return;
2681   }
2682   if (O->isCOFF())
2683     return printCOFFFileHeader(cast<object::COFFObjectFile>(*O));
2684   if (O->isWasm())
2685     return printWasmFileHeader(O);
2686   if (O->isMachO()) {
2687     printMachOFileHeader(O);
2688     if (!OnlyFirst)
2689       printMachOLoadCommands(O);
2690     return;
2691   }
2692   reportError(O->getFileName(), "Invalid/Unsupported object file format");
2693 }
2694 
printFileHeaders(const ObjectFile * O)2695 static void printFileHeaders(const ObjectFile *O) {
2696   if (!O->isELF() && !O->isCOFF())
2697     reportError(O->getFileName(), "Invalid/Unsupported object file format");
2698 
2699   Triple::ArchType AT = O->getArch();
2700   outs() << "architecture: " << Triple::getArchTypeName(AT) << "\n";
2701   uint64_t Address = unwrapOrError(O->getStartAddress(), O->getFileName());
2702 
2703   StringRef Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
2704   outs() << "start address: "
2705          << "0x" << format(Fmt.data(), Address) << "\n";
2706 }
2707 
printArchiveChild(StringRef Filename,const Archive::Child & C)2708 static void printArchiveChild(StringRef Filename, const Archive::Child &C) {
2709   Expected<sys::fs::perms> ModeOrErr = C.getAccessMode();
2710   if (!ModeOrErr) {
2711     WithColor::error(errs(), ToolName) << "ill-formed archive entry.\n";
2712     consumeError(ModeOrErr.takeError());
2713     return;
2714   }
2715   sys::fs::perms Mode = ModeOrErr.get();
2716   outs() << ((Mode & sys::fs::owner_read) ? "r" : "-");
2717   outs() << ((Mode & sys::fs::owner_write) ? "w" : "-");
2718   outs() << ((Mode & sys::fs::owner_exe) ? "x" : "-");
2719   outs() << ((Mode & sys::fs::group_read) ? "r" : "-");
2720   outs() << ((Mode & sys::fs::group_write) ? "w" : "-");
2721   outs() << ((Mode & sys::fs::group_exe) ? "x" : "-");
2722   outs() << ((Mode & sys::fs::others_read) ? "r" : "-");
2723   outs() << ((Mode & sys::fs::others_write) ? "w" : "-");
2724   outs() << ((Mode & sys::fs::others_exe) ? "x" : "-");
2725 
2726   outs() << " ";
2727 
2728   outs() << format("%d/%d %6" PRId64 " ", unwrapOrError(C.getUID(), Filename),
2729                    unwrapOrError(C.getGID(), Filename),
2730                    unwrapOrError(C.getRawSize(), Filename));
2731 
2732   StringRef RawLastModified = C.getRawLastModified();
2733   unsigned Seconds;
2734   if (RawLastModified.getAsInteger(10, Seconds))
2735     outs() << "(date: \"" << RawLastModified
2736            << "\" contains non-decimal chars) ";
2737   else {
2738     // Since ctime(3) returns a 26 character string of the form:
2739     // "Sun Sep 16 01:03:52 1973\n\0"
2740     // just print 24 characters.
2741     time_t t = Seconds;
2742     outs() << format("%.24s ", ctime(&t));
2743   }
2744 
2745   StringRef Name = "";
2746   Expected<StringRef> NameOrErr = C.getName();
2747   if (!NameOrErr) {
2748     consumeError(NameOrErr.takeError());
2749     Name = unwrapOrError(C.getRawName(), Filename);
2750   } else {
2751     Name = NameOrErr.get();
2752   }
2753   outs() << Name << "\n";
2754 }
2755 
2756 // For ELF only now.
shouldWarnForInvalidStartStopAddress(ObjectFile * Obj)2757 static bool shouldWarnForInvalidStartStopAddress(ObjectFile *Obj) {
2758   if (const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj)) {
2759     if (Elf->getEType() != ELF::ET_REL)
2760       return true;
2761   }
2762   return false;
2763 }
2764 
checkForInvalidStartStopAddress(ObjectFile * Obj,uint64_t Start,uint64_t Stop)2765 static void checkForInvalidStartStopAddress(ObjectFile *Obj,
2766                                             uint64_t Start, uint64_t Stop) {
2767   if (!shouldWarnForInvalidStartStopAddress(Obj))
2768     return;
2769 
2770   for (const SectionRef &Section : Obj->sections())
2771     if (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC) {
2772       uint64_t BaseAddr = Section.getAddress();
2773       uint64_t Size = Section.getSize();
2774       if ((Start < BaseAddr + Size) && Stop > BaseAddr)
2775         return;
2776     }
2777 
2778   if (!HasStartAddressFlag)
2779     reportWarning("no section has address less than 0x" +
2780                       Twine::utohexstr(Stop) + " specified by --stop-address",
2781                   Obj->getFileName());
2782   else if (!HasStopAddressFlag)
2783     reportWarning("no section has address greater than or equal to 0x" +
2784                       Twine::utohexstr(Start) + " specified by --start-address",
2785                   Obj->getFileName());
2786   else
2787     reportWarning("no section overlaps the range [0x" +
2788                       Twine::utohexstr(Start) + ",0x" + Twine::utohexstr(Stop) +
2789                       ") specified by --start-address/--stop-address",
2790                   Obj->getFileName());
2791 }
2792 
dumpObject(ObjectFile * O,const Archive * A=nullptr,const Archive::Child * C=nullptr)2793 static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
2794                        const Archive::Child *C = nullptr) {
2795   // Avoid other output when using a raw option.
2796   if (!RawClangAST) {
2797     outs() << '\n';
2798     if (A)
2799       outs() << A->getFileName() << "(" << O->getFileName() << ")";
2800     else
2801       outs() << O->getFileName();
2802     outs() << ":\tfile format " << O->getFileFormatName().lower() << "\n";
2803   }
2804 
2805   if (HasStartAddressFlag || HasStopAddressFlag)
2806     checkForInvalidStartStopAddress(O, StartAddress, StopAddress);
2807 
2808   // Note: the order here matches GNU objdump for compatability.
2809   StringRef ArchiveName = A ? A->getFileName() : "";
2810   if (ArchiveHeaders && !MachOOpt && C)
2811     printArchiveChild(ArchiveName, *C);
2812   if (FileHeaders)
2813     printFileHeaders(O);
2814   if (PrivateHeaders || FirstPrivateHeader)
2815     printPrivateFileHeaders(O, FirstPrivateHeader);
2816   if (SectionHeaders)
2817     printSectionHeaders(*O);
2818   if (SymbolTable)
2819     printSymbolTable(*O, ArchiveName);
2820   if (DynamicSymbolTable)
2821     printSymbolTable(*O, ArchiveName, /*ArchitectureName=*/"",
2822                      /*DumpDynamic=*/true);
2823   if (DwarfDumpType != DIDT_Null) {
2824     std::unique_ptr<DIContext> DICtx = DWARFContext::create(*O);
2825     // Dump the complete DWARF structure.
2826     DIDumpOptions DumpOpts;
2827     DumpOpts.DumpType = DwarfDumpType;
2828     DICtx->dump(outs(), DumpOpts);
2829   }
2830   if (Relocations && !Disassemble)
2831     printRelocations(O);
2832   if (DynamicRelocations)
2833     printDynamicRelocations(O);
2834   if (SectionContents)
2835     printSectionContents(O);
2836   if (Disassemble)
2837     disassembleObject(O, Relocations);
2838   if (UnwindInfo)
2839     printUnwindInfo(O);
2840 
2841   // Mach-O specific options:
2842   if (ExportsTrie)
2843     printExportsTrie(O);
2844   if (Rebase)
2845     printRebaseTable(O);
2846   if (Bind)
2847     printBindTable(O);
2848   if (LazyBind)
2849     printLazyBindTable(O);
2850   if (WeakBind)
2851     printWeakBindTable(O);
2852 
2853   // Other special sections:
2854   if (RawClangAST)
2855     printRawClangAST(O);
2856   if (FaultMapSection)
2857     printFaultMaps(O);
2858   if (Offloading)
2859     dumpOffloadBinary(*O);
2860 }
2861 
dumpObject(const COFFImportFile * I,const Archive * A,const Archive::Child * C=nullptr)2862 static void dumpObject(const COFFImportFile *I, const Archive *A,
2863                        const Archive::Child *C = nullptr) {
2864   StringRef ArchiveName = A ? A->getFileName() : "";
2865 
2866   // Avoid other output when using a raw option.
2867   if (!RawClangAST)
2868     outs() << '\n'
2869            << ArchiveName << "(" << I->getFileName() << ")"
2870            << ":\tfile format COFF-import-file"
2871            << "\n\n";
2872 
2873   if (ArchiveHeaders && !MachOOpt && C)
2874     printArchiveChild(ArchiveName, *C);
2875   if (SymbolTable)
2876     printCOFFSymbolTable(*I);
2877 }
2878 
2879 /// Dump each object file in \a a;
dumpArchive(const Archive * A)2880 static void dumpArchive(const Archive *A) {
2881   Error Err = Error::success();
2882   unsigned I = -1;
2883   for (auto &C : A->children(Err)) {
2884     ++I;
2885     Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
2886     if (!ChildOrErr) {
2887       if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
2888         reportError(std::move(E), getFileNameForError(C, I), A->getFileName());
2889       continue;
2890     }
2891     if (ObjectFile *O = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
2892       dumpObject(O, A, &C);
2893     else if (COFFImportFile *I = dyn_cast<COFFImportFile>(&*ChildOrErr.get()))
2894       dumpObject(I, A, &C);
2895     else
2896       reportError(errorCodeToError(object_error::invalid_file_type),
2897                   A->getFileName());
2898   }
2899   if (Err)
2900     reportError(std::move(Err), A->getFileName());
2901 }
2902 
2903 /// Open file and figure out how to dump it.
dumpInput(StringRef file)2904 static void dumpInput(StringRef file) {
2905   // If we are using the Mach-O specific object file parser, then let it parse
2906   // the file and process the command line options.  So the -arch flags can
2907   // be used to select specific slices, etc.
2908   if (MachOOpt) {
2909     parseInputMachO(file);
2910     return;
2911   }
2912 
2913   // Attempt to open the binary.
2914   OwningBinary<Binary> OBinary = unwrapOrError(createBinary(file), file);
2915   Binary &Binary = *OBinary.getBinary();
2916 
2917   if (Archive *A = dyn_cast<Archive>(&Binary))
2918     dumpArchive(A);
2919   else if (ObjectFile *O = dyn_cast<ObjectFile>(&Binary))
2920     dumpObject(O);
2921   else if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Binary))
2922     parseInputMachO(UB);
2923   else if (OffloadBinary *OB = dyn_cast<OffloadBinary>(&Binary))
2924     dumpOffloadSections(*OB);
2925   else
2926     reportError(errorCodeToError(object_error::invalid_file_type), file);
2927 }
2928 
2929 template <typename T>
parseIntArg(const llvm::opt::InputArgList & InputArgs,int ID,T & Value)2930 static void parseIntArg(const llvm::opt::InputArgList &InputArgs, int ID,
2931                         T &Value) {
2932   if (const opt::Arg *A = InputArgs.getLastArg(ID)) {
2933     StringRef V(A->getValue());
2934     if (!llvm::to_integer(V, Value, 0)) {
2935       reportCmdLineError(A->getSpelling() +
2936                          ": expected a non-negative integer, but got '" + V +
2937                          "'");
2938     }
2939   }
2940 }
2941 
parseBuildIDArg(const opt::Arg * A)2942 static object::BuildID parseBuildIDArg(const opt::Arg *A) {
2943   StringRef V(A->getValue());
2944   std::string Bytes;
2945   if (!tryGetFromHex(V, Bytes))
2946     reportCmdLineError(A->getSpelling() + ": expected a build ID, but got '" +
2947                        V + "'");
2948   ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
2949                             Bytes.size());
2950   return object::BuildID(BuildID.begin(), BuildID.end());
2951 }
2952 
invalidArgValue(const opt::Arg * A)2953 void objdump::invalidArgValue(const opt::Arg *A) {
2954   reportCmdLineError("'" + StringRef(A->getValue()) +
2955                      "' is not a valid value for '" + A->getSpelling() + "'");
2956 }
2957 
2958 static std::vector<std::string>
commaSeparatedValues(const llvm::opt::InputArgList & InputArgs,int ID)2959 commaSeparatedValues(const llvm::opt::InputArgList &InputArgs, int ID) {
2960   std::vector<std::string> Values;
2961   for (StringRef Value : InputArgs.getAllArgValues(ID)) {
2962     llvm::SmallVector<StringRef, 2> SplitValues;
2963     llvm::SplitString(Value, SplitValues, ",");
2964     for (StringRef SplitValue : SplitValues)
2965       Values.push_back(SplitValue.str());
2966   }
2967   return Values;
2968 }
2969 
parseOtoolOptions(const llvm::opt::InputArgList & InputArgs)2970 static void parseOtoolOptions(const llvm::opt::InputArgList &InputArgs) {
2971   MachOOpt = true;
2972   FullLeadingAddr = true;
2973   PrintImmHex = true;
2974 
2975   ArchName = InputArgs.getLastArgValue(OTOOL_arch).str();
2976   LinkOptHints = InputArgs.hasArg(OTOOL_C);
2977   if (InputArgs.hasArg(OTOOL_d))
2978     FilterSections.push_back("__DATA,__data");
2979   DylibId = InputArgs.hasArg(OTOOL_D);
2980   UniversalHeaders = InputArgs.hasArg(OTOOL_f);
2981   DataInCode = InputArgs.hasArg(OTOOL_G);
2982   FirstPrivateHeader = InputArgs.hasArg(OTOOL_h);
2983   IndirectSymbols = InputArgs.hasArg(OTOOL_I);
2984   ShowRawInsn = InputArgs.hasArg(OTOOL_j);
2985   PrivateHeaders = InputArgs.hasArg(OTOOL_l);
2986   DylibsUsed = InputArgs.hasArg(OTOOL_L);
2987   MCPU = InputArgs.getLastArgValue(OTOOL_mcpu_EQ).str();
2988   ObjcMetaData = InputArgs.hasArg(OTOOL_o);
2989   DisSymName = InputArgs.getLastArgValue(OTOOL_p).str();
2990   InfoPlist = InputArgs.hasArg(OTOOL_P);
2991   Relocations = InputArgs.hasArg(OTOOL_r);
2992   if (const Arg *A = InputArgs.getLastArg(OTOOL_s)) {
2993     auto Filter = (A->getValue(0) + StringRef(",") + A->getValue(1)).str();
2994     FilterSections.push_back(Filter);
2995   }
2996   if (InputArgs.hasArg(OTOOL_t))
2997     FilterSections.push_back("__TEXT,__text");
2998   Verbose = InputArgs.hasArg(OTOOL_v) || InputArgs.hasArg(OTOOL_V) ||
2999             InputArgs.hasArg(OTOOL_o);
3000   SymbolicOperands = InputArgs.hasArg(OTOOL_V);
3001   if (InputArgs.hasArg(OTOOL_x))
3002     FilterSections.push_back(",__text");
3003   LeadingAddr = LeadingHeaders = !InputArgs.hasArg(OTOOL_X);
3004 
3005   ChainedFixups = InputArgs.hasArg(OTOOL_chained_fixups);
3006   DyldInfo = InputArgs.hasArg(OTOOL_dyld_info);
3007 
3008   InputFilenames = InputArgs.getAllArgValues(OTOOL_INPUT);
3009   if (InputFilenames.empty())
3010     reportCmdLineError("no input file");
3011 
3012   for (const Arg *A : InputArgs) {
3013     const Option &O = A->getOption();
3014     if (O.getGroup().isValid() && O.getGroup().getID() == OTOOL_grp_obsolete) {
3015       reportCmdLineWarning(O.getPrefixedName() +
3016                            " is obsolete and not implemented");
3017     }
3018   }
3019 }
3020 
parseObjdumpOptions(const llvm::opt::InputArgList & InputArgs)3021 static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) {
3022   parseIntArg(InputArgs, OBJDUMP_adjust_vma_EQ, AdjustVMA);
3023   AllHeaders = InputArgs.hasArg(OBJDUMP_all_headers);
3024   ArchName = InputArgs.getLastArgValue(OBJDUMP_arch_name_EQ).str();
3025   ArchiveHeaders = InputArgs.hasArg(OBJDUMP_archive_headers);
3026   Demangle = InputArgs.hasArg(OBJDUMP_demangle);
3027   Disassemble = InputArgs.hasArg(OBJDUMP_disassemble);
3028   DisassembleAll = InputArgs.hasArg(OBJDUMP_disassemble_all);
3029   SymbolDescription = InputArgs.hasArg(OBJDUMP_symbol_description);
3030   DisassembleSymbols =
3031       commaSeparatedValues(InputArgs, OBJDUMP_disassemble_symbols_EQ);
3032   DisassembleZeroes = InputArgs.hasArg(OBJDUMP_disassemble_zeroes);
3033   if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_dwarf_EQ)) {
3034     DwarfDumpType = StringSwitch<DIDumpType>(A->getValue())
3035                         .Case("frames", DIDT_DebugFrame)
3036                         .Default(DIDT_Null);
3037     if (DwarfDumpType == DIDT_Null)
3038       invalidArgValue(A);
3039   }
3040   DynamicRelocations = InputArgs.hasArg(OBJDUMP_dynamic_reloc);
3041   FaultMapSection = InputArgs.hasArg(OBJDUMP_fault_map_section);
3042   Offloading = InputArgs.hasArg(OBJDUMP_offloading);
3043   FileHeaders = InputArgs.hasArg(OBJDUMP_file_headers);
3044   SectionContents = InputArgs.hasArg(OBJDUMP_full_contents);
3045   PrintLines = InputArgs.hasArg(OBJDUMP_line_numbers);
3046   InputFilenames = InputArgs.getAllArgValues(OBJDUMP_INPUT);
3047   MachOOpt = InputArgs.hasArg(OBJDUMP_macho);
3048   MCPU = InputArgs.getLastArgValue(OBJDUMP_mcpu_EQ).str();
3049   MAttrs = commaSeparatedValues(InputArgs, OBJDUMP_mattr_EQ);
3050   ShowRawInsn = !InputArgs.hasArg(OBJDUMP_no_show_raw_insn);
3051   LeadingAddr = !InputArgs.hasArg(OBJDUMP_no_leading_addr);
3052   RawClangAST = InputArgs.hasArg(OBJDUMP_raw_clang_ast);
3053   Relocations = InputArgs.hasArg(OBJDUMP_reloc);
3054   PrintImmHex =
3055       InputArgs.hasFlag(OBJDUMP_print_imm_hex, OBJDUMP_no_print_imm_hex, true);
3056   PrivateHeaders = InputArgs.hasArg(OBJDUMP_private_headers);
3057   FilterSections = InputArgs.getAllArgValues(OBJDUMP_section_EQ);
3058   SectionHeaders = InputArgs.hasArg(OBJDUMP_section_headers);
3059   ShowAllSymbols = InputArgs.hasArg(OBJDUMP_show_all_symbols);
3060   ShowLMA = InputArgs.hasArg(OBJDUMP_show_lma);
3061   PrintSource = InputArgs.hasArg(OBJDUMP_source);
3062   parseIntArg(InputArgs, OBJDUMP_start_address_EQ, StartAddress);
3063   HasStartAddressFlag = InputArgs.hasArg(OBJDUMP_start_address_EQ);
3064   parseIntArg(InputArgs, OBJDUMP_stop_address_EQ, StopAddress);
3065   HasStopAddressFlag = InputArgs.hasArg(OBJDUMP_stop_address_EQ);
3066   SymbolTable = InputArgs.hasArg(OBJDUMP_syms);
3067   SymbolizeOperands = InputArgs.hasArg(OBJDUMP_symbolize_operands);
3068   DynamicSymbolTable = InputArgs.hasArg(OBJDUMP_dynamic_syms);
3069   TripleName = InputArgs.getLastArgValue(OBJDUMP_triple_EQ).str();
3070   UnwindInfo = InputArgs.hasArg(OBJDUMP_unwind_info);
3071   Wide = InputArgs.hasArg(OBJDUMP_wide);
3072   Prefix = InputArgs.getLastArgValue(OBJDUMP_prefix).str();
3073   parseIntArg(InputArgs, OBJDUMP_prefix_strip, PrefixStrip);
3074   if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_debug_vars_EQ)) {
3075     DbgVariables = StringSwitch<DebugVarsFormat>(A->getValue())
3076                        .Case("ascii", DVASCII)
3077                        .Case("unicode", DVUnicode)
3078                        .Default(DVInvalid);
3079     if (DbgVariables == DVInvalid)
3080       invalidArgValue(A);
3081   }
3082   parseIntArg(InputArgs, OBJDUMP_debug_vars_indent_EQ, DbgIndent);
3083 
3084   parseMachOOptions(InputArgs);
3085 
3086   // Parse -M (--disassembler-options) and deprecated
3087   // --x86-asm-syntax={att,intel}.
3088   //
3089   // Note, for x86, the asm dialect (AssemblerDialect) is initialized when the
3090   // MCAsmInfo is constructed. MCInstPrinter::applyTargetSpecificCLOption is
3091   // called too late. For now we have to use the internal cl::opt option.
3092   const char *AsmSyntax = nullptr;
3093   for (const auto *A : InputArgs.filtered(OBJDUMP_disassembler_options_EQ,
3094                                           OBJDUMP_x86_asm_syntax_att,
3095                                           OBJDUMP_x86_asm_syntax_intel)) {
3096     switch (A->getOption().getID()) {
3097     case OBJDUMP_x86_asm_syntax_att:
3098       AsmSyntax = "--x86-asm-syntax=att";
3099       continue;
3100     case OBJDUMP_x86_asm_syntax_intel:
3101       AsmSyntax = "--x86-asm-syntax=intel";
3102       continue;
3103     }
3104 
3105     SmallVector<StringRef, 2> Values;
3106     llvm::SplitString(A->getValue(), Values, ",");
3107     for (StringRef V : Values) {
3108       if (V == "att")
3109         AsmSyntax = "--x86-asm-syntax=att";
3110       else if (V == "intel")
3111         AsmSyntax = "--x86-asm-syntax=intel";
3112       else
3113         DisassemblerOptions.push_back(V.str());
3114     }
3115   }
3116   if (AsmSyntax) {
3117     const char *Argv[] = {"llvm-objdump", AsmSyntax};
3118     llvm::cl::ParseCommandLineOptions(2, Argv);
3119   }
3120 
3121   // Look up any provided build IDs, then append them to the input filenames.
3122   for (const opt::Arg *A : InputArgs.filtered(OBJDUMP_build_id)) {
3123     object::BuildID BuildID = parseBuildIDArg(A);
3124     std::optional<std::string> Path = BIDFetcher->fetch(BuildID);
3125     if (!Path) {
3126       reportCmdLineError(A->getSpelling() + ": could not find build ID '" +
3127                          A->getValue() + "'");
3128     }
3129     InputFilenames.push_back(std::move(*Path));
3130   }
3131 
3132   // objdump defaults to a.out if no filenames specified.
3133   if (InputFilenames.empty())
3134     InputFilenames.push_back("a.out");
3135 }
3136 
main(int argc,char ** argv)3137 int main(int argc, char **argv) {
3138   using namespace llvm;
3139   InitLLVM X(argc, argv);
3140 
3141   ToolName = argv[0];
3142   std::unique_ptr<CommonOptTable> T;
3143   OptSpecifier Unknown, HelpFlag, HelpHiddenFlag, VersionFlag;
3144 
3145   StringRef Stem = sys::path::stem(ToolName);
3146   auto Is = [=](StringRef Tool) {
3147     // We need to recognize the following filenames:
3148     //
3149     // llvm-objdump -> objdump
3150     // llvm-otool-10.exe -> otool
3151     // powerpc64-unknown-freebsd13-objdump -> objdump
3152     auto I = Stem.rfind_insensitive(Tool);
3153     return I != StringRef::npos &&
3154            (I + Tool.size() == Stem.size() || !isAlnum(Stem[I + Tool.size()]));
3155   };
3156   if (Is("otool")) {
3157     T = std::make_unique<OtoolOptTable>();
3158     Unknown = OTOOL_UNKNOWN;
3159     HelpFlag = OTOOL_help;
3160     HelpHiddenFlag = OTOOL_help_hidden;
3161     VersionFlag = OTOOL_version;
3162   } else {
3163     T = std::make_unique<ObjdumpOptTable>();
3164     Unknown = OBJDUMP_UNKNOWN;
3165     HelpFlag = OBJDUMP_help;
3166     HelpHiddenFlag = OBJDUMP_help_hidden;
3167     VersionFlag = OBJDUMP_version;
3168   }
3169 
3170   BumpPtrAllocator A;
3171   StringSaver Saver(A);
3172   opt::InputArgList InputArgs =
3173       T->parseArgs(argc, argv, Unknown, Saver,
3174                    [&](StringRef Msg) { reportCmdLineError(Msg); });
3175 
3176   if (InputArgs.size() == 0 || InputArgs.hasArg(HelpFlag)) {
3177     T->printHelp(ToolName);
3178     return 0;
3179   }
3180   if (InputArgs.hasArg(HelpHiddenFlag)) {
3181     T->printHelp(ToolName, /*ShowHidden=*/true);
3182     return 0;
3183   }
3184 
3185   // Initialize targets and assembly printers/parsers.
3186   InitializeAllTargetInfos();
3187   InitializeAllTargetMCs();
3188   InitializeAllDisassemblers();
3189 
3190   if (InputArgs.hasArg(VersionFlag)) {
3191     cl::PrintVersionMessage();
3192     if (!Is("otool")) {
3193       outs() << '\n';
3194       TargetRegistry::printRegisteredTargetsForVersion(outs());
3195     }
3196     return 0;
3197   }
3198 
3199   // Initialize debuginfod.
3200   const bool ShouldUseDebuginfodByDefault =
3201       InputArgs.hasArg(OBJDUMP_build_id) || canUseDebuginfod();
3202   std::vector<std::string> DebugFileDirectories =
3203       InputArgs.getAllArgValues(OBJDUMP_debug_file_directory);
3204   if (InputArgs.hasFlag(OBJDUMP_debuginfod, OBJDUMP_no_debuginfod,
3205                         ShouldUseDebuginfodByDefault)) {
3206     HTTPClient::initialize();
3207     BIDFetcher =
3208         std::make_unique<DebuginfodFetcher>(std::move(DebugFileDirectories));
3209   } else {
3210     BIDFetcher =
3211         std::make_unique<BuildIDFetcher>(std::move(DebugFileDirectories));
3212   }
3213 
3214   if (Is("otool"))
3215     parseOtoolOptions(InputArgs);
3216   else
3217     parseObjdumpOptions(InputArgs);
3218 
3219   if (StartAddress >= StopAddress)
3220     reportCmdLineError("start address should be less than stop address");
3221 
3222   // Removes trailing separators from prefix.
3223   while (!Prefix.empty() && sys::path::is_separator(Prefix.back()))
3224     Prefix.pop_back();
3225 
3226   if (AllHeaders)
3227     ArchiveHeaders = FileHeaders = PrivateHeaders = Relocations =
3228         SectionHeaders = SymbolTable = true;
3229 
3230   if (DisassembleAll || PrintSource || PrintLines ||
3231       !DisassembleSymbols.empty())
3232     Disassemble = true;
3233 
3234   if (!ArchiveHeaders && !Disassemble && DwarfDumpType == DIDT_Null &&
3235       !DynamicRelocations && !FileHeaders && !PrivateHeaders && !RawClangAST &&
3236       !Relocations && !SectionHeaders && !SectionContents && !SymbolTable &&
3237       !DynamicSymbolTable && !UnwindInfo && !FaultMapSection && !Offloading &&
3238       !(MachOOpt &&
3239         (Bind || DataInCode || ChainedFixups || DyldInfo || DylibId ||
3240          DylibsUsed || ExportsTrie || FirstPrivateHeader ||
3241          FunctionStartsType != FunctionStartsMode::None || IndirectSymbols ||
3242          InfoPlist || LazyBind || LinkOptHints || ObjcMetaData || Rebase ||
3243          Rpaths || UniversalHeaders || WeakBind || !FilterSections.empty()))) {
3244     T->printHelp(ToolName);
3245     return 2;
3246   }
3247 
3248   DisasmSymbolSet.insert(DisassembleSymbols.begin(), DisassembleSymbols.end());
3249 
3250   llvm::for_each(InputFilenames, dumpInput);
3251 
3252   warnOnNoMatchForSections();
3253 
3254   return EXIT_SUCCESS;
3255 }
3256