1 //===-- llvm-nm.cpp - Symbol table dumping utility for llvm ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This program is a utility that works like traditional Unix "nm", that is, it
10 // prints out the names of symbols in a bitcode or object file, along with some
11 // information about each symbol.
12 //
13 // This "nm" supports many of the features of GNU "nm", including its different
14 // output formats.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/BinaryFormat/COFF.h"
20 #include "llvm/BinaryFormat/MachO.h"
21 #include "llvm/BinaryFormat/XCOFF.h"
22 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
23 #include "llvm/Demangle/Demangle.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/Object/Archive.h"
27 #include "llvm/Object/COFF.h"
28 #include "llvm/Object/COFFImportFile.h"
29 #include "llvm/Object/ELFObjectFile.h"
30 #include "llvm/Object/IRObjectFile.h"
31 #include "llvm/Object/MachO.h"
32 #include "llvm/Object/MachOUniversal.h"
33 #include "llvm/Object/ObjectFile.h"
34 #include "llvm/Object/SymbolicFile.h"
35 #include "llvm/Object/TapiFile.h"
36 #include "llvm/Object/TapiUniversal.h"
37 #include "llvm/Object/Wasm.h"
38 #include "llvm/Object/XCOFFObjectFile.h"
39 #include "llvm/Option/Arg.h"
40 #include "llvm/Option/ArgList.h"
41 #include "llvm/Option/Option.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/FileSystem.h"
44 #include "llvm/Support/Format.h"
45 #include "llvm/Support/LLVMDriver.h"
46 #include "llvm/Support/MemoryBuffer.h"
47 #include "llvm/Support/Program.h"
48 #include "llvm/Support/Signals.h"
49 #include "llvm/Support/TargetSelect.h"
50 #include "llvm/Support/WithColor.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include "llvm/TargetParser/Host.h"
53 #include "llvm/TargetParser/Triple.h"
54 #include <vector>
55 
56 using namespace llvm;
57 using namespace object;
58 
59 namespace {
60 using namespace llvm::opt; // for HelpHidden in Opts.inc
61 enum ID {
62   OPT_INVALID = 0, // This is not an option ID.
63 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
64 #include "Opts.inc"
65 #undef OPTION
66 };
67 
68 #define PREFIX(NAME, VALUE)                                                    \
69   static constexpr StringLiteral NAME##_init[] = VALUE;                        \
70   static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
71                                                 std::size(NAME##_init) - 1);
72 #include "Opts.inc"
73 #undef PREFIX
74 
75 static constexpr opt::OptTable::Info InfoTable[] = {
76 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
77 #include "Opts.inc"
78 #undef OPTION
79 };
80 
81 class NmOptTable : public opt::GenericOptTable {
82 public:
NmOptTable()83   NmOptTable() : opt::GenericOptTable(InfoTable) {
84     setGroupedShortOptions(true);
85   }
86 };
87 
88 enum OutputFormatTy { bsd, sysv, posix, darwin, just_symbols };
89 enum class BitModeTy { Bit32, Bit64, Bit32_64, Any };
90 } // namespace
91 
92 static bool ArchiveMap;
93 static BitModeTy BitMode;
94 static bool DebugSyms;
95 static bool DefinedOnly;
96 static bool Demangle;
97 static bool DynamicSyms;
98 static bool ExportSymbols;
99 static bool ExternalOnly;
100 static bool LineNumbers;
101 static OutputFormatTy OutputFormat;
102 static bool NoLLVMBitcode;
103 static bool NoSort;
104 static bool NoWeakSymbols;
105 static bool NumericSort;
106 static bool PrintFileName;
107 static bool PrintSize;
108 static bool Quiet;
109 static bool ReverseSort;
110 static bool SpecialSyms;
111 static bool SizeSort;
112 static bool UndefinedOnly;
113 static bool WithoutAliases;
114 
115 // XCOFF-specific options.
116 static bool NoRsrc;
117 
118 namespace {
119 enum Radix { d, o, x };
120 } // namespace
121 static Radix AddressRadix;
122 
123 // Mach-O specific options.
124 static bool ArchAll = false;
125 static std::vector<StringRef> ArchFlags;
126 static bool AddDyldInfo;
127 static bool AddInlinedInfo;
128 static bool DyldInfoOnly;
129 static bool FormatMachOasHex;
130 static bool NoDyldInfo;
131 static std::vector<StringRef> SegSect;
132 static bool MachOPrintSizeWarning = false;
133 
134 // Miscellaneous states.
135 static bool PrintAddress = true;
136 static bool MultipleFiles = false;
137 static bool HadError = false;
138 
139 static StringRef ToolName;
140 
warn(Error Err,Twine FileName,Twine Context=Twine (),Twine Archive=Twine ())141 static void warn(Error Err, Twine FileName, Twine Context = Twine(),
142                  Twine Archive = Twine()) {
143   assert(Err);
144 
145   // Flush the standard output so that the warning isn't interleaved with other
146   // output if stdout and stderr are writing to the same place.
147   outs().flush();
148 
149   handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
150     WithColor::warning(errs(), ToolName)
151         << (Archive.str().empty() ? FileName : Archive + "(" + FileName + ")")
152         << ": " << (Context.str().empty() ? "" : Context + ": ") << EI.message()
153         << "\n";
154   });
155 }
156 
error(Twine Message,Twine Path=Twine ())157 static void error(Twine Message, Twine Path = Twine()) {
158   HadError = true;
159   WithColor::error(errs(), ToolName) << Path << ": " << Message << "\n";
160 }
161 
error(std::error_code EC,Twine Path=Twine ())162 static bool error(std::error_code EC, Twine Path = Twine()) {
163   if (EC) {
164     error(EC.message(), Path);
165     return true;
166   }
167   return false;
168 }
169 
170 // This version of error() prints the archive name and member name, for example:
171 // "libx.a(foo.o)" after the ToolName before the error message.  It sets
172 // HadError but returns allowing the code to move on to other archive members.
error(llvm::Error E,StringRef FileName,const Archive::Child & C,StringRef ArchitectureName=StringRef ())173 static void error(llvm::Error E, StringRef FileName, const Archive::Child &C,
174                   StringRef ArchitectureName = StringRef()) {
175   HadError = true;
176   WithColor::error(errs(), ToolName) << FileName;
177 
178   Expected<StringRef> NameOrErr = C.getName();
179   // TODO: if we have a error getting the name then it would be nice to print
180   // the index of which archive member this is and or its offset in the
181   // archive instead of "???" as the name.
182   if (!NameOrErr) {
183     consumeError(NameOrErr.takeError());
184     errs() << "(" << "???" << ")";
185   } else
186     errs() << "(" << NameOrErr.get() << ")";
187 
188   if (!ArchitectureName.empty())
189     errs() << " (for architecture " << ArchitectureName << ")";
190 
191   std::string Buf;
192   raw_string_ostream OS(Buf);
193   logAllUnhandledErrors(std::move(E), OS);
194   OS.flush();
195   errs() << ": " << Buf << "\n";
196 }
197 
198 // This version of error() prints the file name and which architecture slice it
199 // is from, for example: "foo.o (for architecture i386)" after the ToolName
200 // before the error message.  It sets HadError but returns allowing the code to
201 // move on to other architecture slices.
error(llvm::Error E,StringRef FileName,StringRef ArchitectureName=StringRef ())202 static void error(llvm::Error E, StringRef FileName,
203                   StringRef ArchitectureName = StringRef()) {
204   HadError = true;
205   WithColor::error(errs(), ToolName) << FileName;
206 
207   if (!ArchitectureName.empty())
208     errs() << " (for architecture " << ArchitectureName << ")";
209 
210   std::string Buf;
211   raw_string_ostream OS(Buf);
212   logAllUnhandledErrors(std::move(E), OS);
213   OS.flush();
214   errs() << ": " << Buf << "\n";
215 }
216 
217 namespace {
218 struct NMSymbol {
219   uint64_t Address;
220   uint64_t Size;
221   char TypeChar;
222   std::string Name;
223   StringRef SectionName;
224   StringRef TypeName;
225   BasicSymbolRef Sym;
226   StringRef Visibility;
227 
228   // The Sym field above points to the native symbol in the object file,
229   // for Mach-O when we are creating symbols from the dyld info the above
230   // pointer is null as there is no native symbol.  In these cases the fields
231   // below are filled in to represent what would have been a Mach-O nlist
232   // native symbol.
233   uint32_t SymFlags;
234   SectionRef Section;
235   uint8_t NType;
236   uint8_t NSect;
237   uint16_t NDesc;
238   std::string IndirectName;
239 
isDefined__anondb275fe70411::NMSymbol240   bool isDefined() const {
241     if (Sym.getRawDataRefImpl().p)
242       return !(SymFlags & SymbolRef::SF_Undefined);
243     return TypeChar != 'U';
244   }
245 
initializeFlags__anondb275fe70411::NMSymbol246   bool initializeFlags(const SymbolicFile &Obj) {
247     Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
248     if (!SymFlagsOrErr) {
249       // TODO: Test this error.
250       error(SymFlagsOrErr.takeError(), Obj.getFileName());
251       return false;
252     }
253     SymFlags = *SymFlagsOrErr;
254     return true;
255   }
256 
shouldPrint__anondb275fe70411::NMSymbol257   bool shouldPrint() const {
258     bool Undefined = SymFlags & SymbolRef::SF_Undefined;
259     bool Global = SymFlags & SymbolRef::SF_Global;
260     bool Weak = SymFlags & SymbolRef::SF_Weak;
261     bool FormatSpecific = SymFlags & SymbolRef::SF_FormatSpecific;
262     if ((!Undefined && UndefinedOnly) || (Undefined && DefinedOnly) ||
263         (!Global && ExternalOnly) || (Weak && NoWeakSymbols) ||
264         (FormatSpecific && !(SpecialSyms || DebugSyms)))
265       return false;
266     return true;
267   }
268 };
269 
operator <(const NMSymbol & A,const NMSymbol & B)270 bool operator<(const NMSymbol &A, const NMSymbol &B) {
271   if (NumericSort)
272     return std::make_tuple(A.isDefined(), A.Address, A.Name, A.Size) <
273            std::make_tuple(B.isDefined(), B.Address, B.Name, B.Size);
274   if (SizeSort)
275     return std::make_tuple(A.Size, A.Name, A.Address) <
276            std::make_tuple(B.Size, B.Name, B.Address);
277   if (ExportSymbols)
278     return std::make_tuple(A.Name, A.Visibility) <
279            std::make_tuple(B.Name, B.Visibility);
280   return std::make_tuple(A.Name, A.Size, A.Address) <
281          std::make_tuple(B.Name, B.Size, B.Address);
282 }
283 
operator >(const NMSymbol & A,const NMSymbol & B)284 bool operator>(const NMSymbol &A, const NMSymbol &B) { return B < A; }
operator ==(const NMSymbol & A,const NMSymbol & B)285 bool operator==(const NMSymbol &A, const NMSymbol &B) {
286   return !(A < B) && !(B < A);
287 }
288 } // anonymous namespace
289 
290 static StringRef CurrentFilename;
291 
292 static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I);
293 
294 // darwinPrintSymbol() is used to print a symbol from a Mach-O file when the
295 // the OutputFormat is darwin or we are printing Mach-O symbols in hex.  For
296 // the darwin format it produces the same output as darwin's nm(1) -m output
297 // and when printing Mach-O symbols in hex it produces the same output as
298 // darwin's nm(1) -x format.
darwinPrintSymbol(SymbolicFile & Obj,const NMSymbol & S,char * SymbolAddrStr,const char * printBlanks,const char * printDashes,const char * printFormat)299 static void darwinPrintSymbol(SymbolicFile &Obj, const NMSymbol &S,
300                               char *SymbolAddrStr, const char *printBlanks,
301                               const char *printDashes,
302                               const char *printFormat) {
303   MachO::mach_header H;
304   MachO::mach_header_64 H_64;
305   uint32_t Filetype = MachO::MH_OBJECT;
306   uint32_t Flags = 0;
307   uint8_t NType = 0;
308   uint8_t NSect = 0;
309   uint16_t NDesc = 0;
310   uint32_t NStrx = 0;
311   uint64_t NValue = 0;
312   MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
313   if (Obj.isIR()) {
314     uint32_t SymFlags = cantFail(S.Sym.getFlags());
315     if (SymFlags & SymbolRef::SF_Global)
316       NType |= MachO::N_EXT;
317     if (SymFlags & SymbolRef::SF_Hidden)
318       NType |= MachO::N_PEXT;
319     if (SymFlags & SymbolRef::SF_Undefined)
320       NType |= MachO::N_EXT | MachO::N_UNDF;
321     else {
322       // Here we have a symbol definition.  So to fake out a section name we
323       // use 1, 2 and 3 for section numbers.  See below where they are used to
324       // print out fake section names.
325       NType |= MachO::N_SECT;
326       if (SymFlags & SymbolRef::SF_Const)
327         NSect = 3;
328       else if (SymFlags & SymbolRef::SF_Executable)
329         NSect = 1;
330       else
331         NSect = 2;
332     }
333     if (SymFlags & SymbolRef::SF_Weak)
334       NDesc |= MachO::N_WEAK_DEF;
335   } else {
336     DataRefImpl SymDRI = S.Sym.getRawDataRefImpl();
337     if (MachO->is64Bit()) {
338       H_64 = MachO->MachOObjectFile::getHeader64();
339       Filetype = H_64.filetype;
340       Flags = H_64.flags;
341       if (SymDRI.p){
342         MachO::nlist_64 STE_64 = MachO->getSymbol64TableEntry(SymDRI);
343         NType = STE_64.n_type;
344         NSect = STE_64.n_sect;
345         NDesc = STE_64.n_desc;
346         NStrx = STE_64.n_strx;
347         NValue = STE_64.n_value;
348       } else {
349         NType = S.NType;
350         NSect = S.NSect;
351         NDesc = S.NDesc;
352         NStrx = 0;
353         NValue = S.Address;
354       }
355     } else {
356       H = MachO->MachOObjectFile::getHeader();
357       Filetype = H.filetype;
358       Flags = H.flags;
359       if (SymDRI.p){
360         MachO::nlist STE = MachO->getSymbolTableEntry(SymDRI);
361         NType = STE.n_type;
362         NSect = STE.n_sect;
363         NDesc = STE.n_desc;
364         NStrx = STE.n_strx;
365         NValue = STE.n_value;
366       } else {
367         NType = S.NType;
368         NSect = S.NSect;
369         NDesc = S.NDesc;
370         NStrx = 0;
371         NValue = S.Address;
372       }
373     }
374   }
375 
376   // If we are printing Mach-O symbols in hex do that and return.
377   if (FormatMachOasHex) {
378     outs() << format(printFormat, NValue) << ' '
379            << format("%02x %02x %04x %08x", NType, NSect, NDesc, NStrx) << ' '
380            << S.Name;
381     if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
382       outs() << " (indirect for ";
383       outs() << format(printFormat, NValue) << ' ';
384       StringRef IndirectName;
385       if (S.Sym.getRawDataRefImpl().p) {
386         if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
387           outs() << "?)";
388         else
389           outs() << IndirectName << ")";
390       } else
391         outs() << S.IndirectName << ")";
392     }
393     outs() << "\n";
394     return;
395   }
396 
397   if (PrintAddress) {
398     if ((NType & MachO::N_TYPE) == MachO::N_INDR)
399       strcpy(SymbolAddrStr, printBlanks);
400     if (Obj.isIR() && (NType & MachO::N_TYPE) == MachO::N_TYPE)
401       strcpy(SymbolAddrStr, printDashes);
402     outs() << SymbolAddrStr << ' ';
403   }
404 
405   switch (NType & MachO::N_TYPE) {
406   case MachO::N_UNDF:
407     if (NValue != 0) {
408       outs() << "(common) ";
409       if (MachO::GET_COMM_ALIGN(NDesc) != 0)
410         outs() << "(alignment 2^" << (int)MachO::GET_COMM_ALIGN(NDesc) << ") ";
411     } else {
412       if ((NType & MachO::N_TYPE) == MachO::N_PBUD)
413         outs() << "(prebound ";
414       else
415         outs() << "(";
416       if ((NDesc & MachO::REFERENCE_TYPE) ==
417           MachO::REFERENCE_FLAG_UNDEFINED_LAZY)
418         outs() << "undefined [lazy bound]) ";
419       else if ((NDesc & MachO::REFERENCE_TYPE) ==
420                MachO::REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY)
421         outs() << "undefined [private lazy bound]) ";
422       else if ((NDesc & MachO::REFERENCE_TYPE) ==
423                MachO::REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY)
424         outs() << "undefined [private]) ";
425       else
426         outs() << "undefined) ";
427     }
428     break;
429   case MachO::N_ABS:
430     outs() << "(absolute) ";
431     break;
432   case MachO::N_INDR:
433     outs() << "(indirect) ";
434     break;
435   case MachO::N_SECT: {
436     if (Obj.isIR()) {
437       // For llvm bitcode files print out a fake section name using the values
438       // use 1, 2 and 3 for section numbers as set above.
439       if (NSect == 1)
440         outs() << "(LTO,CODE) ";
441       else if (NSect == 2)
442         outs() << "(LTO,DATA) ";
443       else if (NSect == 3)
444         outs() << "(LTO,RODATA) ";
445       else
446         outs() << "(?,?) ";
447       break;
448     }
449     section_iterator Sec = SectionRef();
450     if (S.Sym.getRawDataRefImpl().p) {
451       Expected<section_iterator> SecOrErr =
452           MachO->getSymbolSection(S.Sym.getRawDataRefImpl());
453       if (!SecOrErr) {
454         consumeError(SecOrErr.takeError());
455         outs() << "(?,?) ";
456         break;
457       }
458       Sec = *SecOrErr;
459       if (Sec == MachO->section_end()) {
460         outs() << "(?,?) ";
461         break;
462       }
463     } else {
464       Sec = S.Section;
465     }
466     DataRefImpl Ref = Sec->getRawDataRefImpl();
467     StringRef SectionName;
468     if (Expected<StringRef> NameOrErr = MachO->getSectionName(Ref))
469       SectionName = *NameOrErr;
470     StringRef SegmentName = MachO->getSectionFinalSegmentName(Ref);
471     outs() << "(" << SegmentName << "," << SectionName << ") ";
472     break;
473   }
474   default:
475     outs() << "(?) ";
476     break;
477   }
478 
479   if (NType & MachO::N_EXT) {
480     if (NDesc & MachO::REFERENCED_DYNAMICALLY)
481       outs() << "[referenced dynamically] ";
482     if (NType & MachO::N_PEXT) {
483       if ((NDesc & MachO::N_WEAK_DEF) == MachO::N_WEAK_DEF)
484         outs() << "weak private external ";
485       else
486         outs() << "private external ";
487     } else {
488       if ((NDesc & MachO::N_WEAK_REF) == MachO::N_WEAK_REF ||
489           (NDesc & MachO::N_WEAK_DEF) == MachO::N_WEAK_DEF) {
490         if ((NDesc & (MachO::N_WEAK_REF | MachO::N_WEAK_DEF)) ==
491             (MachO::N_WEAK_REF | MachO::N_WEAK_DEF))
492           outs() << "weak external automatically hidden ";
493         else
494           outs() << "weak external ";
495       } else
496         outs() << "external ";
497     }
498   } else {
499     if (NType & MachO::N_PEXT)
500       outs() << "non-external (was a private external) ";
501     else
502       outs() << "non-external ";
503   }
504 
505   if (Filetype == MachO::MH_OBJECT) {
506     if (NDesc & MachO::N_NO_DEAD_STRIP)
507       outs() << "[no dead strip] ";
508     if ((NType & MachO::N_TYPE) != MachO::N_UNDF &&
509         NDesc & MachO::N_SYMBOL_RESOLVER)
510       outs() << "[symbol resolver] ";
511     if ((NType & MachO::N_TYPE) != MachO::N_UNDF && NDesc & MachO::N_ALT_ENTRY)
512       outs() << "[alt entry] ";
513     if ((NType & MachO::N_TYPE) != MachO::N_UNDF && NDesc & MachO::N_COLD_FUNC)
514       outs() << "[cold func] ";
515   }
516 
517   if ((NDesc & MachO::N_ARM_THUMB_DEF) == MachO::N_ARM_THUMB_DEF)
518     outs() << "[Thumb] ";
519 
520   if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
521     outs() << S.Name << " (for ";
522     StringRef IndirectName;
523     if (MachO) {
524       if (S.Sym.getRawDataRefImpl().p) {
525         if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
526           outs() << "?)";
527         else
528           outs() << IndirectName << ")";
529       } else
530         outs() << S.IndirectName << ")";
531     } else
532       outs() << "?)";
533   } else
534     outs() << S.Name;
535 
536   if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL &&
537       (((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) ||
538        (NType & MachO::N_TYPE) == MachO::N_PBUD)) {
539     uint32_t LibraryOrdinal = MachO::GET_LIBRARY_ORDINAL(NDesc);
540     if (LibraryOrdinal != 0) {
541       if (LibraryOrdinal == MachO::EXECUTABLE_ORDINAL)
542         outs() << " (from executable)";
543       else if (LibraryOrdinal == MachO::DYNAMIC_LOOKUP_ORDINAL)
544         outs() << " (dynamically looked up)";
545       else {
546         StringRef LibraryName;
547         if (!MachO ||
548             MachO->getLibraryShortNameByIndex(LibraryOrdinal - 1, LibraryName))
549           outs() << " (from bad library ordinal " << LibraryOrdinal << ")";
550         else
551           outs() << " (from " << LibraryName << ")";
552       }
553     }
554   }
555 }
556 
557 // Table that maps Darwin's Mach-O stab constants to strings to allow printing.
558 struct DarwinStabName {
559   uint8_t NType;
560   const char *Name;
561 };
562 const struct DarwinStabName DarwinStabNames[] = {
563     {MachO::N_GSYM, "GSYM"},    {MachO::N_FNAME, "FNAME"},
564     {MachO::N_FUN, "FUN"},      {MachO::N_STSYM, "STSYM"},
565     {MachO::N_LCSYM, "LCSYM"},  {MachO::N_BNSYM, "BNSYM"},
566     {MachO::N_PC, "PC"},        {MachO::N_AST, "AST"},
567     {MachO::N_OPT, "OPT"},      {MachO::N_RSYM, "RSYM"},
568     {MachO::N_SLINE, "SLINE"},  {MachO::N_ENSYM, "ENSYM"},
569     {MachO::N_SSYM, "SSYM"},    {MachO::N_SO, "SO"},
570     {MachO::N_OSO, "OSO"},      {MachO::N_LIB, "LIB"},
571     {MachO::N_LSYM, "LSYM"},    {MachO::N_BINCL, "BINCL"},
572     {MachO::N_SOL, "SOL"},      {MachO::N_PARAMS, "PARAM"},
573     {MachO::N_VERSION, "VERS"}, {MachO::N_OLEVEL, "OLEV"},
574     {MachO::N_PSYM, "PSYM"},    {MachO::N_EINCL, "EINCL"},
575     {MachO::N_ENTRY, "ENTRY"},  {MachO::N_LBRAC, "LBRAC"},
576     {MachO::N_EXCL, "EXCL"},    {MachO::N_RBRAC, "RBRAC"},
577     {MachO::N_BCOMM, "BCOMM"},  {MachO::N_ECOMM, "ECOMM"},
578     {MachO::N_ECOML, "ECOML"},  {MachO::N_LENG, "LENG"},
579 };
580 
getDarwinStabString(uint8_t NType)581 static const char *getDarwinStabString(uint8_t NType) {
582   for (auto I : ArrayRef(DarwinStabNames))
583     if (I.NType == NType)
584       return I.Name;
585   return nullptr;
586 }
587 
588 // darwinPrintStab() prints the n_sect, n_desc along with a symbolic name of
589 // a stab n_type value in a Mach-O file.
darwinPrintStab(MachOObjectFile * MachO,const NMSymbol & S)590 static void darwinPrintStab(MachOObjectFile *MachO, const NMSymbol &S) {
591   MachO::nlist_64 STE_64;
592   MachO::nlist STE;
593   uint8_t NType;
594   uint8_t NSect;
595   uint16_t NDesc;
596   DataRefImpl SymDRI = S.Sym.getRawDataRefImpl();
597   if (MachO->is64Bit()) {
598     STE_64 = MachO->getSymbol64TableEntry(SymDRI);
599     NType = STE_64.n_type;
600     NSect = STE_64.n_sect;
601     NDesc = STE_64.n_desc;
602   } else {
603     STE = MachO->getSymbolTableEntry(SymDRI);
604     NType = STE.n_type;
605     NSect = STE.n_sect;
606     NDesc = STE.n_desc;
607   }
608 
609   outs() << format(" %02x %04x ", NSect, NDesc);
610   if (const char *stabString = getDarwinStabString(NType))
611     outs() << format("%5.5s", stabString);
612   else
613     outs() << format("   %02x", NType);
614 }
615 
symbolIsDefined(const NMSymbol & Sym)616 static bool symbolIsDefined(const NMSymbol &Sym) {
617   return Sym.TypeChar != 'U' && Sym.TypeChar != 'w' && Sym.TypeChar != 'v';
618 }
619 
writeFileName(raw_ostream & S,StringRef ArchiveName,StringRef ArchitectureName)620 static void writeFileName(raw_ostream &S, StringRef ArchiveName,
621                           StringRef ArchitectureName) {
622   if (!ArchitectureName.empty())
623     S << "(for architecture " << ArchitectureName << "):";
624   if (OutputFormat == posix && !ArchiveName.empty())
625     S << ArchiveName << "[" << CurrentFilename << "]: ";
626   else {
627     if (!ArchiveName.empty())
628       S << ArchiveName << ":";
629     S << CurrentFilename << ": ";
630   }
631 }
632 
sortSymbolList(std::vector<NMSymbol> & SymbolList)633 static void sortSymbolList(std::vector<NMSymbol> &SymbolList) {
634   if (NoSort)
635     return;
636 
637   if (ReverseSort)
638     llvm::sort(SymbolList, std::greater<>());
639   else
640     llvm::sort(SymbolList);
641 }
642 
printExportSymbolList(const std::vector<NMSymbol> & SymbolList)643 static void printExportSymbolList(const std::vector<NMSymbol> &SymbolList) {
644   for (const NMSymbol &Sym : SymbolList) {
645     outs() << Sym.Name;
646     if (!Sym.Visibility.empty())
647       outs() << ' ' << Sym.Visibility;
648     outs() << '\n';
649   }
650 }
651 
printLineNumbers(symbolize::LLVMSymbolizer & Symbolizer,const NMSymbol & S)652 static void printLineNumbers(symbolize::LLVMSymbolizer &Symbolizer,
653                              const NMSymbol &S) {
654   const auto *Obj = dyn_cast<ObjectFile>(S.Sym.getObject());
655   if (!Obj)
656     return;
657   const SymbolRef Sym(S.Sym);
658   uint64_t SectionIndex = object::SectionedAddress::UndefSection;
659   section_iterator Sec = cantFail(Sym.getSection());
660   if (Sec != Obj->section_end())
661     SectionIndex = Sec->getIndex();
662   object::SectionedAddress Address = {cantFail(Sym.getAddress()), SectionIndex};
663 
664   std::string FileName;
665   uint32_t Line;
666   switch (S.TypeChar) {
667   // For undefined symbols, find the first relocation for that symbol with a
668   // line number.
669   case 'U': {
670     for (const SectionRef RelocsSec : Obj->sections()) {
671       if (RelocsSec.relocations().empty())
672         continue;
673       SectionRef TextSec = *cantFail(RelocsSec.getRelocatedSection());
674       if (!TextSec.isText())
675         continue;
676       for (const RelocationRef R : RelocsSec.relocations()) {
677         if (R.getSymbol() != Sym)
678           continue;
679         Expected<DILineInfo> ResOrErr = Symbolizer.symbolizeCode(
680             *Obj, {TextSec.getAddress() + R.getOffset(), SectionIndex});
681         if (!ResOrErr) {
682           error(ResOrErr.takeError(), Obj->getFileName());
683           return;
684         }
685         if (ResOrErr->FileName == DILineInfo::BadString)
686           return;
687         FileName = std::move(ResOrErr->FileName);
688         Line = ResOrErr->Line;
689         break;
690       }
691       if (!FileName.empty())
692         break;
693     }
694     if (FileName.empty())
695       return;
696     break;
697   }
698   case 't':
699   case 'T': {
700     Expected<DILineInfo> ResOrErr = Symbolizer.symbolizeCode(*Obj, Address);
701     if (!ResOrErr) {
702       error(ResOrErr.takeError(), Obj->getFileName());
703       return;
704     }
705     if (ResOrErr->FileName == DILineInfo::BadString)
706       return;
707     FileName = std::move(ResOrErr->FileName);
708     Line = ResOrErr->Line;
709     break;
710   }
711   default: {
712     Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(*Obj, Address);
713     if (!ResOrErr) {
714       error(ResOrErr.takeError(), Obj->getFileName());
715       return;
716     }
717     if (ResOrErr->DeclFile.empty())
718       return;
719     FileName = std::move(ResOrErr->DeclFile);
720     Line = ResOrErr->DeclLine;
721     break;
722   }
723   }
724   outs() << '\t' << FileName << ':' << Line;
725 }
726 
printSymbolList(SymbolicFile & Obj,std::vector<NMSymbol> & SymbolList,bool printName,StringRef ArchiveName,StringRef ArchitectureName)727 static void printSymbolList(SymbolicFile &Obj,
728                             std::vector<NMSymbol> &SymbolList, bool printName,
729                             StringRef ArchiveName, StringRef ArchitectureName) {
730   std::optional<symbolize::LLVMSymbolizer> Symbolizer;
731   if (LineNumbers)
732     Symbolizer.emplace();
733 
734   if (!PrintFileName) {
735     if ((OutputFormat == bsd || OutputFormat == posix ||
736          OutputFormat == just_symbols) &&
737         MultipleFiles && printName) {
738       outs() << '\n' << CurrentFilename << ":\n";
739     } else if (OutputFormat == sysv) {
740       outs() << "\n\nSymbols from " << CurrentFilename << ":\n\n";
741       if (Obj.is64Bit())
742         outs() << "Name                  Value           Class        Type"
743                << "         Size             Line  Section\n";
744       else
745         outs() << "Name                  Value   Class        Type"
746                << "         Size     Line  Section\n";
747     }
748   }
749 
750   const char *printBlanks, *printDashes, *printFormat;
751   if (Obj.is64Bit()) {
752     printBlanks = "                ";
753     printDashes = "----------------";
754     switch (AddressRadix) {
755     case Radix::o:
756       printFormat = OutputFormat == posix ? "%" PRIo64 : "%016" PRIo64;
757       break;
758     case Radix::x:
759       printFormat = OutputFormat == posix ? "%" PRIx64 : "%016" PRIx64;
760       break;
761     default:
762       printFormat = OutputFormat == posix ? "%" PRId64 : "%016" PRId64;
763     }
764   } else {
765     printBlanks = "        ";
766     printDashes = "--------";
767     switch (AddressRadix) {
768     case Radix::o:
769       printFormat = OutputFormat == posix ? "%" PRIo64 : "%08" PRIo64;
770       break;
771     case Radix::x:
772       printFormat = OutputFormat == posix ? "%" PRIx64 : "%08" PRIx64;
773       break;
774     default:
775       printFormat = OutputFormat == posix ? "%" PRId64 : "%08" PRId64;
776     }
777   }
778 
779   for (const NMSymbol &S : SymbolList) {
780     if (!S.shouldPrint())
781       continue;
782 
783     std::string Name = S.Name;
784     MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
785     if (Demangle)
786       Name = demangle(Name);
787 
788     if (PrintFileName)
789       writeFileName(outs(), ArchiveName, ArchitectureName);
790     if ((OutputFormat == just_symbols ||
791          (UndefinedOnly && MachO && OutputFormat != darwin)) &&
792         OutputFormat != posix) {
793       outs() << Name << "\n";
794       continue;
795     }
796 
797     char SymbolAddrStr[23], SymbolSizeStr[23];
798 
799     // If the format is SysV or the symbol isn't defined, then print spaces.
800     if (OutputFormat == sysv || !symbolIsDefined(S)) {
801       if (OutputFormat == posix) {
802         format(printFormat, S.Address)
803             .print(SymbolAddrStr, sizeof(SymbolAddrStr));
804         format(printFormat, S.Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
805       } else {
806         strcpy(SymbolAddrStr, printBlanks);
807         strcpy(SymbolSizeStr, printBlanks);
808       }
809     }
810 
811     if (symbolIsDefined(S)) {
812       // Otherwise, print the symbol address and size.
813       if (Obj.isIR())
814         strcpy(SymbolAddrStr, printDashes);
815       else if (MachO && S.TypeChar == 'I')
816         strcpy(SymbolAddrStr, printBlanks);
817       else
818         format(printFormat, S.Address)
819             .print(SymbolAddrStr, sizeof(SymbolAddrStr));
820       format(printFormat, S.Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
821     }
822 
823     // If OutputFormat is darwin or we are printing Mach-O symbols in hex and
824     // we have a MachOObjectFile, call darwinPrintSymbol to print as darwin's
825     // nm(1) -m output or hex, else if OutputFormat is darwin or we are
826     // printing Mach-O symbols in hex and not a Mach-O object fall back to
827     // OutputFormat bsd (see below).
828     if ((OutputFormat == darwin || FormatMachOasHex) && (MachO || Obj.isIR())) {
829       darwinPrintSymbol(Obj, S, SymbolAddrStr, printBlanks, printDashes,
830                         printFormat);
831     } else if (OutputFormat == posix) {
832       outs() << Name << " " << S.TypeChar << " " << SymbolAddrStr << " "
833              << (MachO ? "0" : SymbolSizeStr);
834     } else if (OutputFormat == bsd || (OutputFormat == darwin && !MachO)) {
835       if (PrintAddress)
836         outs() << SymbolAddrStr << ' ';
837       if (PrintSize)
838         outs() << SymbolSizeStr << ' ';
839       outs() << S.TypeChar;
840       if (S.TypeChar == '-' && MachO)
841         darwinPrintStab(MachO, S);
842       outs() << " " << Name;
843       if (S.TypeChar == 'I' && MachO) {
844         outs() << " (indirect for ";
845         if (S.Sym.getRawDataRefImpl().p) {
846           StringRef IndirectName;
847           if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
848             outs() << "?)";
849           else
850             outs() << IndirectName << ")";
851         } else
852           outs() << S.IndirectName << ")";
853       }
854     } else if (OutputFormat == sysv) {
855       outs() << left_justify(Name, 20) << "|" << SymbolAddrStr << "|   "
856              << S.TypeChar << "  |" << right_justify(S.TypeName, 18) << "|"
857              << SymbolSizeStr << "|     |" << S.SectionName;
858     }
859     if (LineNumbers)
860       printLineNumbers(*Symbolizer, S);
861     outs() << '\n';
862   }
863 
864   SymbolList.clear();
865 }
866 
getSymbolNMTypeChar(ELFObjectFileBase & Obj,basic_symbol_iterator I)867 static char getSymbolNMTypeChar(ELFObjectFileBase &Obj,
868                                 basic_symbol_iterator I) {
869   // OK, this is ELF
870   elf_symbol_iterator SymI(I);
871 
872   Expected<elf_section_iterator> SecIOrErr = SymI->getSection();
873   if (!SecIOrErr) {
874     consumeError(SecIOrErr.takeError());
875     return '?';
876   }
877 
878   uint8_t Binding = SymI->getBinding();
879   if (Binding == ELF::STB_GNU_UNIQUE)
880     return 'u';
881 
882   assert(Binding != ELF::STB_WEAK && "STB_WEAK not tested in calling function");
883   if (Binding != ELF::STB_GLOBAL && Binding != ELF::STB_LOCAL)
884     return '?';
885 
886   elf_section_iterator SecI = *SecIOrErr;
887   if (SecI != Obj.section_end()) {
888     uint32_t Type = SecI->getType();
889     uint64_t Flags = SecI->getFlags();
890     if (Flags & ELF::SHF_EXECINSTR)
891       return 't';
892     if (Type == ELF::SHT_NOBITS)
893       return 'b';
894     if (Flags & ELF::SHF_ALLOC)
895       return Flags & ELF::SHF_WRITE ? 'd' : 'r';
896 
897     auto NameOrErr = SecI->getName();
898     if (!NameOrErr) {
899       consumeError(NameOrErr.takeError());
900       return '?';
901     }
902     if ((*NameOrErr).starts_with(".debug"))
903       return 'N';
904     if (!(Flags & ELF::SHF_WRITE))
905       return 'n';
906   }
907 
908   return '?';
909 }
910 
getSymbolNMTypeChar(COFFObjectFile & Obj,symbol_iterator I)911 static char getSymbolNMTypeChar(COFFObjectFile &Obj, symbol_iterator I) {
912   COFFSymbolRef Symb = Obj.getCOFFSymbol(*I);
913   // OK, this is COFF.
914   symbol_iterator SymI(I);
915 
916   Expected<StringRef> Name = SymI->getName();
917   if (!Name) {
918     consumeError(Name.takeError());
919     return '?';
920   }
921 
922   char Ret = StringSwitch<char>(*Name)
923                  .StartsWith(".debug", 'N')
924                  .StartsWith(".sxdata", 'N')
925                  .Default('?');
926 
927   if (Ret != '?')
928     return Ret;
929 
930   uint32_t Characteristics = 0;
931   if (!COFF::isReservedSectionNumber(Symb.getSectionNumber())) {
932     Expected<section_iterator> SecIOrErr = SymI->getSection();
933     if (!SecIOrErr) {
934       consumeError(SecIOrErr.takeError());
935       return '?';
936     }
937     section_iterator SecI = *SecIOrErr;
938     const coff_section *Section = Obj.getCOFFSection(*SecI);
939     Characteristics = Section->Characteristics;
940     if (Expected<StringRef> NameOrErr = Obj.getSectionName(Section))
941       if (NameOrErr->starts_with(".idata"))
942         return 'i';
943   }
944 
945   switch (Symb.getSectionNumber()) {
946   case COFF::IMAGE_SYM_DEBUG:
947     return 'n';
948   default:
949     // Check section type.
950     if (Characteristics & COFF::IMAGE_SCN_CNT_CODE)
951       return 't';
952     if (Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
953       return Characteristics & COFF::IMAGE_SCN_MEM_WRITE ? 'd' : 'r';
954     if (Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
955       return 'b';
956     if (Characteristics & COFF::IMAGE_SCN_LNK_INFO)
957       return 'i';
958     // Check for section symbol.
959     if (Symb.isSectionDefinition())
960       return 's';
961   }
962 
963   return '?';
964 }
965 
getSymbolNMTypeChar(XCOFFObjectFile & Obj,symbol_iterator I)966 static char getSymbolNMTypeChar(XCOFFObjectFile &Obj, symbol_iterator I) {
967   Expected<uint32_t> TypeOrErr = I->getType();
968   if (!TypeOrErr) {
969     warn(TypeOrErr.takeError(), Obj.getFileName(),
970          "for symbol with index " +
971              Twine(Obj.getSymbolIndex(I->getRawDataRefImpl().p)));
972     return '?';
973   }
974 
975   uint32_t SymType = *TypeOrErr;
976 
977   if (SymType == SymbolRef::ST_File)
978     return 'f';
979 
980   // If the I->getSection() call would return an error, the earlier I->getType()
981   // call will already have returned the same error first.
982   section_iterator SecIter = cantFail(I->getSection());
983 
984   if (SecIter == Obj.section_end())
985     return '?';
986 
987   if (Obj.isDebugSection(SecIter->getRawDataRefImpl()))
988     return 'N';
989 
990   if (SecIter->isText())
991     return 't';
992 
993   if (SecIter->isData())
994     return 'd';
995 
996   if (SecIter->isBSS())
997     return 'b';
998 
999   return '?';
1000 }
1001 
getSymbolNMTypeChar(COFFImportFile & Obj)1002 static char getSymbolNMTypeChar(COFFImportFile &Obj) {
1003   switch (Obj.getCOFFImportHeader()->getType()) {
1004   case COFF::IMPORT_CODE:
1005     return 't';
1006   case COFF::IMPORT_DATA:
1007     return 'd';
1008   case COFF::IMPORT_CONST:
1009     return 'r';
1010   }
1011   return '?';
1012 }
1013 
getSymbolNMTypeChar(MachOObjectFile & Obj,basic_symbol_iterator I)1014 static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) {
1015   DataRefImpl Symb = I->getRawDataRefImpl();
1016   uint8_t NType = Obj.is64Bit() ? Obj.getSymbol64TableEntry(Symb).n_type
1017                                 : Obj.getSymbolTableEntry(Symb).n_type;
1018 
1019   if (NType & MachO::N_STAB)
1020     return '-';
1021 
1022   switch (NType & MachO::N_TYPE) {
1023   case MachO::N_ABS:
1024     return 's';
1025   case MachO::N_INDR:
1026     return 'i';
1027   case MachO::N_SECT: {
1028     Expected<section_iterator> SecOrErr = Obj.getSymbolSection(Symb);
1029     if (!SecOrErr) {
1030       consumeError(SecOrErr.takeError());
1031       return 's';
1032     }
1033     section_iterator Sec = *SecOrErr;
1034     if (Sec == Obj.section_end())
1035       return 's';
1036     DataRefImpl Ref = Sec->getRawDataRefImpl();
1037     StringRef SectionName;
1038     if (Expected<StringRef> NameOrErr = Obj.getSectionName(Ref))
1039       SectionName = *NameOrErr;
1040     StringRef SegmentName = Obj.getSectionFinalSegmentName(Ref);
1041     if (Obj.is64Bit() && Obj.getHeader64().filetype == MachO::MH_KEXT_BUNDLE &&
1042         SegmentName == "__TEXT_EXEC" && SectionName == "__text")
1043       return 't';
1044     if (SegmentName == "__TEXT" && SectionName == "__text")
1045       return 't';
1046     if (SegmentName == "__DATA" && SectionName == "__data")
1047       return 'd';
1048     if (SegmentName == "__DATA" && SectionName == "__bss")
1049       return 'b';
1050     return 's';
1051   }
1052   }
1053 
1054   return '?';
1055 }
1056 
getSymbolNMTypeChar(TapiFile & Obj,basic_symbol_iterator I)1057 static char getSymbolNMTypeChar(TapiFile &Obj, basic_symbol_iterator I) {
1058   auto Type = cantFail(Obj.getSymbolType(I->getRawDataRefImpl()));
1059   switch (Type) {
1060   case SymbolRef::ST_Function:
1061     return 't';
1062   case SymbolRef::ST_Data:
1063     if (Obj.hasSegmentInfo())
1064       return 'd';
1065     [[fallthrough]];
1066   default:
1067     return 's';
1068   }
1069 }
1070 
getSymbolNMTypeChar(WasmObjectFile & Obj,basic_symbol_iterator I)1071 static char getSymbolNMTypeChar(WasmObjectFile &Obj, basic_symbol_iterator I) {
1072   uint32_t Flags = cantFail(I->getFlags());
1073   if (Flags & SymbolRef::SF_Executable)
1074     return 't';
1075   return 'd';
1076 }
1077 
getSymbolNMTypeChar(IRObjectFile & Obj,basic_symbol_iterator I)1078 static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I) {
1079   uint32_t Flags = cantFail(I->getFlags());
1080   // FIXME: should we print 'b'? At the IR level we cannot be sure if this
1081   // will be in bss or not, but we could approximate.
1082   if (Flags & SymbolRef::SF_Executable)
1083     return 't';
1084   else if (Triple(Obj.getTargetTriple()).isOSDarwin() &&
1085            (Flags & SymbolRef::SF_Const))
1086     return 's';
1087   else
1088     return 'd';
1089 }
1090 
isObject(SymbolicFile & Obj,basic_symbol_iterator I)1091 static bool isObject(SymbolicFile &Obj, basic_symbol_iterator I) {
1092   return isa<ELFObjectFileBase>(&Obj) &&
1093          elf_symbol_iterator(I)->getELFType() == ELF::STT_OBJECT;
1094 }
1095 
1096 // For ELF object files, Set TypeName to the symbol typename, to be printed
1097 // in the 'Type' column of the SYSV format output.
getNMTypeName(SymbolicFile & Obj,basic_symbol_iterator I)1098 static StringRef getNMTypeName(SymbolicFile &Obj, basic_symbol_iterator I) {
1099   if (isa<ELFObjectFileBase>(&Obj)) {
1100     elf_symbol_iterator SymI(I);
1101     return SymI->getELFTypeName();
1102   }
1103   return "";
1104 }
1105 
1106 // Return Posix nm class type tag (single letter), but also set SecName and
1107 // section and name, to be used in format=sysv output.
getNMSectionTagAndName(SymbolicFile & Obj,basic_symbol_iterator I,StringRef & SecName)1108 static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I,
1109                                    StringRef &SecName) {
1110   // Symbol Flags have been checked in the caller.
1111   uint32_t Symflags = cantFail(I->getFlags());
1112   if (ELFObjectFileBase *ELFObj = dyn_cast<ELFObjectFileBase>(&Obj)) {
1113     if (Symflags & object::SymbolRef::SF_Absolute)
1114       SecName = "*ABS*";
1115     else if (Symflags & object::SymbolRef::SF_Common)
1116       SecName = "*COM*";
1117     else if (Symflags & object::SymbolRef::SF_Undefined)
1118       SecName = "*UND*";
1119     else {
1120       elf_symbol_iterator SymI(I);
1121       Expected<elf_section_iterator> SecIOrErr = SymI->getSection();
1122       if (!SecIOrErr) {
1123         consumeError(SecIOrErr.takeError());
1124         return '?';
1125       }
1126 
1127       if (*SecIOrErr == ELFObj->section_end())
1128         return '?';
1129 
1130       Expected<StringRef> NameOrErr = (*SecIOrErr)->getName();
1131       if (!NameOrErr) {
1132         consumeError(NameOrErr.takeError());
1133         return '?';
1134       }
1135       SecName = *NameOrErr;
1136     }
1137   }
1138 
1139   if (Symflags & object::SymbolRef::SF_Undefined) {
1140     if (isa<MachOObjectFile>(Obj) || !(Symflags & object::SymbolRef::SF_Weak))
1141       return 'U';
1142     return isObject(Obj, I) ? 'v' : 'w';
1143   }
1144   if (isa<ELFObjectFileBase>(&Obj))
1145     if (ELFSymbolRef(*I).getELFType() == ELF::STT_GNU_IFUNC)
1146       return 'i';
1147   if (!isa<MachOObjectFile>(Obj) && (Symflags & object::SymbolRef::SF_Weak))
1148     return isObject(Obj, I) ? 'V' : 'W';
1149 
1150   if (Symflags & object::SymbolRef::SF_Common)
1151     return 'C';
1152 
1153   char Ret = '?';
1154   if (Symflags & object::SymbolRef::SF_Absolute)
1155     Ret = 'a';
1156   else if (IRObjectFile *IR = dyn_cast<IRObjectFile>(&Obj))
1157     Ret = getSymbolNMTypeChar(*IR, I);
1158   else if (COFFObjectFile *COFF = dyn_cast<COFFObjectFile>(&Obj))
1159     Ret = getSymbolNMTypeChar(*COFF, I);
1160   else if (XCOFFObjectFile *XCOFF = dyn_cast<XCOFFObjectFile>(&Obj))
1161     Ret = getSymbolNMTypeChar(*XCOFF, I);
1162   else if (COFFImportFile *COFFImport = dyn_cast<COFFImportFile>(&Obj))
1163     Ret = getSymbolNMTypeChar(*COFFImport);
1164   else if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
1165     Ret = getSymbolNMTypeChar(*MachO, I);
1166   else if (WasmObjectFile *Wasm = dyn_cast<WasmObjectFile>(&Obj))
1167     Ret = getSymbolNMTypeChar(*Wasm, I);
1168   else if (TapiFile *Tapi = dyn_cast<TapiFile>(&Obj))
1169     Ret = getSymbolNMTypeChar(*Tapi, I);
1170   else if (ELFObjectFileBase *ELF = dyn_cast<ELFObjectFileBase>(&Obj)) {
1171     Ret = getSymbolNMTypeChar(*ELF, I);
1172     if (ELFSymbolRef(*I).getBinding() == ELF::STB_GNU_UNIQUE)
1173       return Ret;
1174   } else
1175     llvm_unreachable("unknown binary format");
1176 
1177   if (!(Symflags & object::SymbolRef::SF_Global))
1178     return Ret;
1179 
1180   return toupper(Ret);
1181 }
1182 
1183 // getNsectForSegSect() is used to implement the Mach-O "-s segname sectname"
1184 // option to dump only those symbols from that section in a Mach-O file.
1185 // It is called once for each Mach-O file from getSymbolNamesFromObject()
1186 // to get the section number for that named section from the command line
1187 // arguments. It returns the section number for that section in the Mach-O
1188 // file or zero it is not present.
getNsectForSegSect(MachOObjectFile * Obj)1189 static unsigned getNsectForSegSect(MachOObjectFile *Obj) {
1190   unsigned Nsect = 1;
1191   for (auto &S : Obj->sections()) {
1192     DataRefImpl Ref = S.getRawDataRefImpl();
1193     StringRef SectionName;
1194     if (Expected<StringRef> NameOrErr = Obj->getSectionName(Ref))
1195       SectionName = *NameOrErr;
1196     StringRef SegmentName = Obj->getSectionFinalSegmentName(Ref);
1197     if (SegmentName == SegSect[0] && SectionName == SegSect[1])
1198       return Nsect;
1199     Nsect++;
1200   }
1201   return 0;
1202 }
1203 
1204 // getNsectInMachO() is used to implement the Mach-O "-s segname sectname"
1205 // option to dump only those symbols from that section in a Mach-O file.
1206 // It is called once for each symbol in a Mach-O file from
1207 // getSymbolNamesFromObject() and returns the section number for that symbol
1208 // if it is in a section, else it returns 0.
getNsectInMachO(MachOObjectFile & Obj,BasicSymbolRef Sym)1209 static unsigned getNsectInMachO(MachOObjectFile &Obj, BasicSymbolRef Sym) {
1210   DataRefImpl Symb = Sym.getRawDataRefImpl();
1211   if (Obj.is64Bit()) {
1212     MachO::nlist_64 STE = Obj.getSymbol64TableEntry(Symb);
1213     return (STE.n_type & MachO::N_TYPE) == MachO::N_SECT ? STE.n_sect : 0;
1214   }
1215   MachO::nlist STE = Obj.getSymbolTableEntry(Symb);
1216   return (STE.n_type & MachO::N_TYPE) == MachO::N_SECT ? STE.n_sect : 0;
1217 }
1218 
dumpSymbolsFromDLInfoMachO(MachOObjectFile & MachO,std::vector<NMSymbol> & SymbolList)1219 static void dumpSymbolsFromDLInfoMachO(MachOObjectFile &MachO,
1220                                        std::vector<NMSymbol> &SymbolList) {
1221   size_t I = SymbolList.size();
1222   std::string ExportsNameBuffer;
1223   raw_string_ostream EOS(ExportsNameBuffer);
1224   std::string BindsNameBuffer;
1225   raw_string_ostream BOS(BindsNameBuffer);
1226   std::string LazysNameBuffer;
1227   raw_string_ostream LOS(LazysNameBuffer);
1228   std::string WeaksNameBuffer;
1229   raw_string_ostream WOS(WeaksNameBuffer);
1230   std::string FunctionStartsNameBuffer;
1231   raw_string_ostream FOS(FunctionStartsNameBuffer);
1232 
1233   MachO::mach_header H;
1234   MachO::mach_header_64 H_64;
1235   uint32_t HFlags = 0;
1236   if (MachO.is64Bit()) {
1237     H_64 = MachO.MachOObjectFile::getHeader64();
1238     HFlags = H_64.flags;
1239   } else {
1240     H = MachO.MachOObjectFile::getHeader();
1241     HFlags = H.flags;
1242   }
1243   uint64_t BaseSegmentAddress = 0;
1244   for (const auto &Command : MachO.load_commands()) {
1245     if (Command.C.cmd == MachO::LC_SEGMENT) {
1246       MachO::segment_command Seg = MachO.getSegmentLoadCommand(Command);
1247       if (Seg.fileoff == 0 && Seg.filesize != 0) {
1248         BaseSegmentAddress = Seg.vmaddr;
1249         break;
1250       }
1251     } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
1252       MachO::segment_command_64 Seg = MachO.getSegment64LoadCommand(Command);
1253       if (Seg.fileoff == 0 && Seg.filesize != 0) {
1254         BaseSegmentAddress = Seg.vmaddr;
1255         break;
1256       }
1257     }
1258   }
1259   if (DyldInfoOnly || AddDyldInfo ||
1260       HFlags & MachO::MH_NLIST_OUTOFSYNC_WITH_DYLDINFO) {
1261     unsigned ExportsAdded = 0;
1262     Error Err = Error::success();
1263     for (const llvm::object::ExportEntry &Entry : MachO.exports(Err)) {
1264       bool found = false;
1265       bool ReExport = false;
1266       if (!DyldInfoOnly) {
1267         for (const NMSymbol &S : SymbolList)
1268           if (S.Address == Entry.address() + BaseSegmentAddress &&
1269               S.Name == Entry.name()) {
1270             found = true;
1271             break;
1272           }
1273       }
1274       if (!found) {
1275         NMSymbol S = {};
1276         S.Address = Entry.address() + BaseSegmentAddress;
1277         S.Size = 0;
1278         S.TypeChar = '\0';
1279         S.Name = Entry.name().str();
1280         // There is no symbol in the nlist symbol table for this so we set
1281         // Sym effectivly to null and the rest of code in here must test for
1282         // it and not do things like Sym.getFlags() for it.
1283         S.Sym = BasicSymbolRef();
1284         S.SymFlags = SymbolRef::SF_Global;
1285         S.Section = SectionRef();
1286         S.NType = 0;
1287         S.NSect = 0;
1288         S.NDesc = 0;
1289 
1290         uint64_t EFlags = Entry.flags();
1291         bool Abs = ((EFlags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) ==
1292                     MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE);
1293         bool Resolver = (EFlags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER);
1294         ReExport = (EFlags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT);
1295         bool WeakDef = (EFlags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
1296         if (WeakDef)
1297           S.NDesc |= MachO::N_WEAK_DEF;
1298         if (Abs) {
1299           S.NType = MachO::N_EXT | MachO::N_ABS;
1300           S.TypeChar = 'A';
1301         } else if (ReExport) {
1302           S.NType = MachO::N_EXT | MachO::N_INDR;
1303           S.TypeChar = 'I';
1304         } else {
1305           S.NType = MachO::N_EXT | MachO::N_SECT;
1306           if (Resolver) {
1307             S.Address = Entry.other() + BaseSegmentAddress;
1308             if ((S.Address & 1) != 0 && !MachO.is64Bit() &&
1309                 H.cputype == MachO::CPU_TYPE_ARM) {
1310               S.Address &= ~1LL;
1311               S.NDesc |= MachO::N_ARM_THUMB_DEF;
1312             }
1313           } else {
1314             S.Address = Entry.address() + BaseSegmentAddress;
1315           }
1316           StringRef SegmentName = StringRef();
1317           StringRef SectionName = StringRef();
1318           for (const SectionRef &Section : MachO.sections()) {
1319             S.NSect++;
1320 
1321             if (Expected<StringRef> NameOrErr = Section.getName())
1322               SectionName = *NameOrErr;
1323             else
1324               consumeError(NameOrErr.takeError());
1325 
1326             SegmentName =
1327                 MachO.getSectionFinalSegmentName(Section.getRawDataRefImpl());
1328             if (S.Address >= Section.getAddress() &&
1329                 S.Address < Section.getAddress() + Section.getSize()) {
1330               S.Section = Section;
1331               break;
1332             } else if (Entry.name() == "__mh_execute_header" &&
1333                        SegmentName == "__TEXT" && SectionName == "__text") {
1334               S.Section = Section;
1335               S.NDesc |= MachO::REFERENCED_DYNAMICALLY;
1336               break;
1337             }
1338           }
1339           if (SegmentName == "__TEXT" && SectionName == "__text")
1340             S.TypeChar = 'T';
1341           else if (SegmentName == "__DATA" && SectionName == "__data")
1342             S.TypeChar = 'D';
1343           else if (SegmentName == "__DATA" && SectionName == "__bss")
1344             S.TypeChar = 'B';
1345           else
1346             S.TypeChar = 'S';
1347         }
1348         SymbolList.push_back(S);
1349 
1350         EOS << Entry.name();
1351         EOS << '\0';
1352         ExportsAdded++;
1353 
1354         // For ReExports there are a two more things to do, first add the
1355         // indirect name and second create the undefined symbol using the
1356         // referened dynamic library.
1357         if (ReExport) {
1358 
1359           // Add the indirect name.
1360           if (Entry.otherName().empty())
1361             EOS << Entry.name();
1362           else
1363             EOS << Entry.otherName();
1364           EOS << '\0';
1365 
1366           // Now create the undefined symbol using the referened dynamic
1367           // library.
1368           NMSymbol U = {};
1369           U.Address = 0;
1370           U.Size = 0;
1371           U.TypeChar = 'U';
1372           if (Entry.otherName().empty())
1373             U.Name = Entry.name().str();
1374           else
1375             U.Name = Entry.otherName().str();
1376           // Again there is no symbol in the nlist symbol table for this so
1377           // we set Sym effectivly to null and the rest of code in here must
1378           // test for it and not do things like Sym.getFlags() for it.
1379           U.Sym = BasicSymbolRef();
1380           U.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
1381           U.Section = SectionRef();
1382           U.NType = MachO::N_EXT | MachO::N_UNDF;
1383           U.NSect = 0;
1384           U.NDesc = 0;
1385           // The library ordinal for this undefined symbol is in the export
1386           // trie Entry.other().
1387           MachO::SET_LIBRARY_ORDINAL(U.NDesc, Entry.other());
1388           SymbolList.push_back(U);
1389 
1390           // Finally add the undefined symbol's name.
1391           if (Entry.otherName().empty())
1392             EOS << Entry.name();
1393           else
1394             EOS << Entry.otherName();
1395           EOS << '\0';
1396           ExportsAdded++;
1397         }
1398       }
1399     }
1400     if (Err)
1401       error(std::move(Err), MachO.getFileName());
1402     // Set the symbol names and indirect names for the added symbols.
1403     if (ExportsAdded) {
1404       EOS.flush();
1405       const char *Q = ExportsNameBuffer.c_str();
1406       for (unsigned K = 0; K < ExportsAdded; K++) {
1407         SymbolList[I].Name = Q;
1408         Q += strlen(Q) + 1;
1409         if (SymbolList[I].TypeChar == 'I') {
1410           SymbolList[I].IndirectName = Q;
1411           Q += strlen(Q) + 1;
1412         }
1413         I++;
1414       }
1415     }
1416 
1417     // Add the undefined symbols from the bind entries.
1418     unsigned BindsAdded = 0;
1419     Error BErr = Error::success();
1420     StringRef LastSymbolName = StringRef();
1421     for (const llvm::object::MachOBindEntry &Entry : MachO.bindTable(BErr)) {
1422       bool found = false;
1423       if (LastSymbolName == Entry.symbolName())
1424         found = true;
1425       else if (!DyldInfoOnly) {
1426         for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
1427           if (SymbolList[J].Name == Entry.symbolName())
1428             found = true;
1429         }
1430       }
1431       if (!found) {
1432         LastSymbolName = Entry.symbolName();
1433         NMSymbol B = {};
1434         B.Address = 0;
1435         B.Size = 0;
1436         B.TypeChar = 'U';
1437         // There is no symbol in the nlist symbol table for this so we set
1438         // Sym effectivly to null and the rest of code in here must test for
1439         // it and not do things like Sym.getFlags() for it.
1440         B.Sym = BasicSymbolRef();
1441         B.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
1442         B.NType = MachO::N_EXT | MachO::N_UNDF;
1443         B.NSect = 0;
1444         B.NDesc = 0;
1445         MachO::SET_LIBRARY_ORDINAL(B.NDesc, Entry.ordinal());
1446         B.Name = Entry.symbolName().str();
1447         SymbolList.push_back(B);
1448         BOS << Entry.symbolName();
1449         BOS << '\0';
1450         BindsAdded++;
1451       }
1452     }
1453     if (BErr)
1454       error(std::move(BErr), MachO.getFileName());
1455     // Set the symbol names and indirect names for the added symbols.
1456     if (BindsAdded) {
1457       BOS.flush();
1458       const char *Q = BindsNameBuffer.c_str();
1459       for (unsigned K = 0; K < BindsAdded; K++) {
1460         SymbolList[I].Name = Q;
1461         Q += strlen(Q) + 1;
1462         if (SymbolList[I].TypeChar == 'I') {
1463           SymbolList[I].IndirectName = Q;
1464           Q += strlen(Q) + 1;
1465         }
1466         I++;
1467       }
1468     }
1469 
1470     // Add the undefined symbols from the lazy bind entries.
1471     unsigned LazysAdded = 0;
1472     Error LErr = Error::success();
1473     LastSymbolName = StringRef();
1474     for (const llvm::object::MachOBindEntry &Entry :
1475          MachO.lazyBindTable(LErr)) {
1476       bool found = false;
1477       if (LastSymbolName == Entry.symbolName())
1478         found = true;
1479       else {
1480         // Here we must check to see it this symbol is already in the
1481         // SymbolList as it might have already have been added above via a
1482         // non-lazy (bind) entry.
1483         for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
1484           if (SymbolList[J].Name == Entry.symbolName())
1485             found = true;
1486         }
1487       }
1488       if (!found) {
1489         LastSymbolName = Entry.symbolName();
1490         NMSymbol L = {};
1491         L.Name = Entry.symbolName().str();
1492         L.Address = 0;
1493         L.Size = 0;
1494         L.TypeChar = 'U';
1495         // There is no symbol in the nlist symbol table for this so we set
1496         // Sym effectivly to null and the rest of code in here must test for
1497         // it and not do things like Sym.getFlags() for it.
1498         L.Sym = BasicSymbolRef();
1499         L.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
1500         L.NType = MachO::N_EXT | MachO::N_UNDF;
1501         L.NSect = 0;
1502         // The REFERENCE_FLAG_UNDEFINED_LAZY is no longer used but here it
1503         // makes sence since we are creating this from a lazy bind entry.
1504         L.NDesc = MachO::REFERENCE_FLAG_UNDEFINED_LAZY;
1505         MachO::SET_LIBRARY_ORDINAL(L.NDesc, Entry.ordinal());
1506         SymbolList.push_back(L);
1507         LOS << Entry.symbolName();
1508         LOS << '\0';
1509         LazysAdded++;
1510       }
1511     }
1512     if (LErr)
1513       error(std::move(LErr), MachO.getFileName());
1514     // Set the symbol names and indirect names for the added symbols.
1515     if (LazysAdded) {
1516       LOS.flush();
1517       const char *Q = LazysNameBuffer.c_str();
1518       for (unsigned K = 0; K < LazysAdded; K++) {
1519         SymbolList[I].Name = Q;
1520         Q += strlen(Q) + 1;
1521         if (SymbolList[I].TypeChar == 'I') {
1522           SymbolList[I].IndirectName = Q;
1523           Q += strlen(Q) + 1;
1524         }
1525         I++;
1526       }
1527     }
1528 
1529     // Add the undefineds symbol from the weak bind entries which are not
1530     // strong symbols.
1531     unsigned WeaksAdded = 0;
1532     Error WErr = Error::success();
1533     LastSymbolName = StringRef();
1534     for (const llvm::object::MachOBindEntry &Entry :
1535          MachO.weakBindTable(WErr)) {
1536       bool found = false;
1537       unsigned J = 0;
1538       if (LastSymbolName == Entry.symbolName() ||
1539           Entry.flags() & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) {
1540         found = true;
1541       } else {
1542         for (J = 0; J < SymbolList.size() && !found; ++J) {
1543           if (SymbolList[J].Name == Entry.symbolName()) {
1544             found = true;
1545             break;
1546           }
1547         }
1548       }
1549       if (!found) {
1550         LastSymbolName = Entry.symbolName();
1551         NMSymbol W = {};
1552         W.Name = Entry.symbolName().str();
1553         W.Address = 0;
1554         W.Size = 0;
1555         W.TypeChar = 'U';
1556         // There is no symbol in the nlist symbol table for this so we set
1557         // Sym effectivly to null and the rest of code in here must test for
1558         // it and not do things like Sym.getFlags() for it.
1559         W.Sym = BasicSymbolRef();
1560         W.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
1561         W.NType = MachO::N_EXT | MachO::N_UNDF;
1562         W.NSect = 0;
1563         // Odd that we are using N_WEAK_DEF on an undefined symbol but that is
1564         // what is created in this case by the linker when there are real
1565         // symbols in the nlist structs.
1566         W.NDesc = MachO::N_WEAK_DEF;
1567         SymbolList.push_back(W);
1568         WOS << Entry.symbolName();
1569         WOS << '\0';
1570         WeaksAdded++;
1571       } else {
1572         // This is the case the symbol was previously been found and it could
1573         // have been added from a bind or lazy bind symbol.  If so and not
1574         // a definition also mark it as weak.
1575         if (SymbolList[J].TypeChar == 'U')
1576           // See comment above about N_WEAK_DEF.
1577           SymbolList[J].NDesc |= MachO::N_WEAK_DEF;
1578       }
1579     }
1580     if (WErr)
1581       error(std::move(WErr), MachO.getFileName());
1582     // Set the symbol names and indirect names for the added symbols.
1583     if (WeaksAdded) {
1584       WOS.flush();
1585       const char *Q = WeaksNameBuffer.c_str();
1586       for (unsigned K = 0; K < WeaksAdded; K++) {
1587         SymbolList[I].Name = Q;
1588         Q += strlen(Q) + 1;
1589         if (SymbolList[I].TypeChar == 'I') {
1590           SymbolList[I].IndirectName = Q;
1591           Q += strlen(Q) + 1;
1592         }
1593         I++;
1594       }
1595     }
1596 
1597     // Trying adding symbol from the function starts table and LC_MAIN entry
1598     // point.
1599     SmallVector<uint64_t, 8> FoundFns;
1600     uint64_t lc_main_offset = UINT64_MAX;
1601     for (const auto &Command : MachO.load_commands()) {
1602       if (Command.C.cmd == MachO::LC_FUNCTION_STARTS) {
1603         // We found a function starts segment, parse the addresses for
1604         // consumption.
1605         MachO::linkedit_data_command LLC =
1606             MachO.getLinkeditDataLoadCommand(Command);
1607 
1608         MachO.ReadULEB128s(LLC.dataoff, FoundFns);
1609       } else if (Command.C.cmd == MachO::LC_MAIN) {
1610         MachO::entry_point_command LCmain = MachO.getEntryPointCommand(Command);
1611         lc_main_offset = LCmain.entryoff;
1612       }
1613     }
1614     // See if these addresses are already in the symbol table.
1615     unsigned FunctionStartsAdded = 0;
1616     for (uint64_t f = 0; f < FoundFns.size(); f++) {
1617       bool found = false;
1618       for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
1619         if (SymbolList[J].Address == FoundFns[f] + BaseSegmentAddress)
1620           found = true;
1621       }
1622       // See this address is not already in the symbol table fake up an
1623       // nlist for it.
1624       if (!found) {
1625         NMSymbol F = {};
1626         F.Name = "<redacted function X>";
1627         F.Address = FoundFns[f] + BaseSegmentAddress;
1628         F.Size = 0;
1629         // There is no symbol in the nlist symbol table for this so we set
1630         // Sym effectivly to null and the rest of code in here must test for
1631         // it and not do things like Sym.getFlags() for it.
1632         F.Sym = BasicSymbolRef();
1633         F.SymFlags = 0;
1634         F.NType = MachO::N_SECT;
1635         F.NSect = 0;
1636         StringRef SegmentName = StringRef();
1637         StringRef SectionName = StringRef();
1638         for (const SectionRef &Section : MachO.sections()) {
1639           if (Expected<StringRef> NameOrErr = Section.getName())
1640             SectionName = *NameOrErr;
1641           else
1642             consumeError(NameOrErr.takeError());
1643 
1644           SegmentName =
1645               MachO.getSectionFinalSegmentName(Section.getRawDataRefImpl());
1646           F.NSect++;
1647           if (F.Address >= Section.getAddress() &&
1648               F.Address < Section.getAddress() + Section.getSize()) {
1649             F.Section = Section;
1650             break;
1651           }
1652         }
1653         if (SegmentName == "__TEXT" && SectionName == "__text")
1654           F.TypeChar = 't';
1655         else if (SegmentName == "__DATA" && SectionName == "__data")
1656           F.TypeChar = 'd';
1657         else if (SegmentName == "__DATA" && SectionName == "__bss")
1658           F.TypeChar = 'b';
1659         else
1660           F.TypeChar = 's';
1661         F.NDesc = 0;
1662         SymbolList.push_back(F);
1663         if (FoundFns[f] == lc_main_offset)
1664           FOS << "<redacted LC_MAIN>";
1665         else
1666           FOS << "<redacted function " << f << ">";
1667         FOS << '\0';
1668         FunctionStartsAdded++;
1669       }
1670     }
1671     if (FunctionStartsAdded) {
1672       FOS.flush();
1673       const char *Q = FunctionStartsNameBuffer.c_str();
1674       for (unsigned K = 0; K < FunctionStartsAdded; K++) {
1675         SymbolList[I].Name = Q;
1676         Q += strlen(Q) + 1;
1677         if (SymbolList[I].TypeChar == 'I') {
1678           SymbolList[I].IndirectName = Q;
1679           Q += strlen(Q) + 1;
1680         }
1681         I++;
1682       }
1683     }
1684   }
1685 }
1686 
shouldDump(SymbolicFile & Obj)1687 static bool shouldDump(SymbolicFile &Obj) {
1688   // The -X option is currently only implemented for XCOFF, ELF, and IR object
1689   // files. The option isn't fundamentally impossible with other formats, just
1690   // isn't implemented.
1691   if (!isa<XCOFFObjectFile>(Obj) && !isa<ELFObjectFileBase>(Obj) &&
1692       !isa<IRObjectFile>(Obj))
1693     return true;
1694 
1695   return Obj.is64Bit() ? BitMode != BitModeTy::Bit32
1696                        : BitMode != BitModeTy::Bit64;
1697 }
1698 
getXCOFFExports(XCOFFObjectFile * XCOFFObj,std::vector<NMSymbol> & SymbolList,StringRef ArchiveName)1699 static void getXCOFFExports(XCOFFObjectFile *XCOFFObj,
1700                             std::vector<NMSymbol> &SymbolList,
1701                             StringRef ArchiveName) {
1702   // Skip Shared object file.
1703   if (XCOFFObj->getFlags() & XCOFF::F_SHROBJ)
1704     return;
1705 
1706   for (SymbolRef Sym : XCOFFObj->symbols()) {
1707     // There is no visibility in old 32 bit XCOFF object file interpret.
1708     bool HasVisibilityAttr =
1709         XCOFFObj->is64Bit() || (XCOFFObj->auxiliaryHeader32() &&
1710                                 (XCOFFObj->auxiliaryHeader32()->getVersion() ==
1711                                  XCOFF::NEW_XCOFF_INTERPRET));
1712 
1713     if (HasVisibilityAttr) {
1714       XCOFFSymbolRef XCOFFSym = XCOFFObj->toSymbolRef(Sym.getRawDataRefImpl());
1715       uint16_t SymType = XCOFFSym.getSymbolType();
1716       if ((SymType & XCOFF::VISIBILITY_MASK) == XCOFF::SYM_V_INTERNAL)
1717         continue;
1718       if ((SymType & XCOFF::VISIBILITY_MASK) == XCOFF::SYM_V_HIDDEN)
1719         continue;
1720     }
1721 
1722     Expected<section_iterator> SymSecOrErr = Sym.getSection();
1723     if (!SymSecOrErr) {
1724       warn(SymSecOrErr.takeError(), XCOFFObj->getFileName(),
1725            "for symbol with index " +
1726                Twine(XCOFFObj->getSymbolIndex(Sym.getRawDataRefImpl().p)),
1727            ArchiveName);
1728       continue;
1729     }
1730     section_iterator SecIter = *SymSecOrErr;
1731     // If the symbol is not in a text or data section, it is not exported.
1732     if (SecIter == XCOFFObj->section_end())
1733       continue;
1734     if (!(SecIter->isText() || SecIter->isData() || SecIter->isBSS()))
1735       continue;
1736 
1737     StringRef SymName = cantFail(Sym.getName());
1738     if (SymName.empty())
1739       continue;
1740     if (SymName.starts_with("__sinit") || SymName.starts_with("__sterm") ||
1741         SymName.front() == '.' || SymName.front() == '(')
1742       continue;
1743 
1744     // Check the SymName regex matching with "^__[0-9]+__".
1745     if (SymName.size() > 4 && SymName.starts_with("__") &&
1746         SymName.ends_with("__")) {
1747       if (std::all_of(SymName.begin() + 2, SymName.end() - 2, isDigit))
1748         continue;
1749     }
1750 
1751     if (SymName == "__rsrc" && NoRsrc)
1752       continue;
1753 
1754     if (SymName.starts_with("__tf1"))
1755       SymName = SymName.substr(6);
1756     else if (SymName.starts_with("__tf9"))
1757       SymName = SymName.substr(14);
1758 
1759     NMSymbol S = {};
1760     S.Name = SymName.str();
1761     S.Sym = Sym;
1762 
1763     if (HasVisibilityAttr) {
1764       XCOFFSymbolRef XCOFFSym = XCOFFObj->toSymbolRef(Sym.getRawDataRefImpl());
1765       uint16_t SymType = XCOFFSym.getSymbolType();
1766       if ((SymType & XCOFF::VISIBILITY_MASK) == XCOFF::SYM_V_PROTECTED)
1767         S.Visibility = "protected";
1768       else if ((SymType & XCOFF::VISIBILITY_MASK) == XCOFF::SYM_V_EXPORTED)
1769         S.Visibility = "export";
1770     }
1771     if (S.initializeFlags(*XCOFFObj))
1772       SymbolList.push_back(S);
1773   }
1774 }
1775 
1776 static Expected<SymbolicFile::basic_symbol_iterator_range>
getDynamicSyms(SymbolicFile & Obj)1777 getDynamicSyms(SymbolicFile &Obj) {
1778   const auto *E = dyn_cast<ELFObjectFileBase>(&Obj);
1779   if (!E)
1780     return createError("File format has no dynamic symbol table");
1781   return E->getDynamicSymbolIterators();
1782 }
1783 
1784 // Returns false if there is error found or true otherwise.
getSymbolNamesFromObject(SymbolicFile & Obj,std::vector<NMSymbol> & SymbolList)1785 static bool getSymbolNamesFromObject(SymbolicFile &Obj,
1786                                      std::vector<NMSymbol> &SymbolList) {
1787   auto Symbols = Obj.symbols();
1788   std::vector<VersionEntry> SymbolVersions;
1789 
1790   if (DynamicSyms) {
1791     Expected<SymbolicFile::basic_symbol_iterator_range> SymbolsOrErr =
1792         getDynamicSyms(Obj);
1793     if (!SymbolsOrErr) {
1794       error(SymbolsOrErr.takeError(), Obj.getFileName());
1795       return false;
1796     }
1797     Symbols = *SymbolsOrErr;
1798     if (const auto *E = dyn_cast<ELFObjectFileBase>(&Obj)) {
1799       if (Expected<std::vector<VersionEntry>> VersionsOrErr =
1800               E->readDynsymVersions())
1801         SymbolVersions = std::move(*VersionsOrErr);
1802       else
1803         WithColor::warning(errs(), ToolName)
1804             << "unable to read symbol versions: "
1805             << toString(VersionsOrErr.takeError()) << "\n";
1806     }
1807   }
1808   // If a "-s segname sectname" option was specified and this is a Mach-O
1809   // file get the section number for that section in this object file.
1810   unsigned int Nsect = 0;
1811   MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
1812   if (!SegSect.empty() && MachO) {
1813     Nsect = getNsectForSegSect(MachO);
1814     // If this section is not in the object file no symbols are printed.
1815     if (Nsect == 0)
1816       return false;
1817   }
1818 
1819   if (!(MachO && DyldInfoOnly)) {
1820     size_t I = -1;
1821     for (BasicSymbolRef Sym : Symbols) {
1822       ++I;
1823       Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
1824       if (!SymFlagsOrErr) {
1825         error(SymFlagsOrErr.takeError(), Obj.getFileName());
1826         return false;
1827       }
1828 
1829       // Don't drop format specifc symbols for ARM and AArch64 ELF targets, they
1830       // are used to repesent mapping symbols and needed to honor the
1831       // --special-syms option.
1832       auto *ELFObj = dyn_cast<ELFObjectFileBase>(&Obj);
1833       bool HasMappingSymbol =
1834           ELFObj && llvm::is_contained({ELF::EM_ARM, ELF::EM_AARCH64,
1835                                         ELF::EM_CSKY, ELF::EM_RISCV},
1836                                        ELFObj->getEMachine());
1837       if (!HasMappingSymbol && !DebugSyms &&
1838           (*SymFlagsOrErr & SymbolRef::SF_FormatSpecific))
1839         continue;
1840       if (WithoutAliases && (*SymFlagsOrErr & SymbolRef::SF_Indirect))
1841         continue;
1842       // If a "-s segname sectname" option was specified and this is a Mach-O
1843       // file and this section appears in this file, Nsect will be non-zero then
1844       // see if this symbol is a symbol from that section and if not skip it.
1845       if (Nsect && Nsect != getNsectInMachO(*MachO, Sym))
1846         continue;
1847       NMSymbol S = {};
1848       S.Size = 0;
1849       S.Address = 0;
1850       if (isa<ELFObjectFileBase>(&Obj))
1851         S.Size = ELFSymbolRef(Sym).getSize();
1852 
1853       if (const XCOFFObjectFile *XCOFFObj =
1854               dyn_cast<const XCOFFObjectFile>(&Obj))
1855         S.Size = XCOFFObj->getSymbolSize(Sym.getRawDataRefImpl());
1856 
1857       if (const WasmObjectFile *WasmObj = dyn_cast<WasmObjectFile>(&Obj)) {
1858         const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym);
1859         if (WasmSym.isTypeData() && !WasmSym.isUndefined())
1860           S.Size = WasmSym.Info.DataRef.Size;
1861       }
1862 
1863       if (PrintAddress && isa<ObjectFile>(Obj)) {
1864         SymbolRef SymRef(Sym);
1865         Expected<uint64_t> AddressOrErr = SymRef.getAddress();
1866         if (!AddressOrErr) {
1867           consumeError(AddressOrErr.takeError());
1868           break;
1869         }
1870         S.Address = *AddressOrErr;
1871       }
1872       S.TypeName = getNMTypeName(Obj, Sym);
1873       S.TypeChar = getNMSectionTagAndName(Obj, Sym, S.SectionName);
1874 
1875       raw_string_ostream OS(S.Name);
1876       if (Error E = Sym.printName(OS)) {
1877         if (MachO) {
1878           OS << "bad string index";
1879           consumeError(std::move(E));
1880         } else
1881           error(std::move(E), Obj.getFileName());
1882       }
1883       if (!SymbolVersions.empty() && !SymbolVersions[I].Name.empty())
1884         S.Name +=
1885             (SymbolVersions[I].IsVerDef ? "@@" : "@") + SymbolVersions[I].Name;
1886 
1887       S.Sym = Sym;
1888       if (S.initializeFlags(Obj))
1889         SymbolList.push_back(S);
1890     }
1891   }
1892 
1893   // If this is a Mach-O file where the nlist symbol table is out of sync
1894   // with the dyld export trie then look through exports and fake up symbols
1895   // for the ones that are missing (also done with the -add-dyldinfo flag).
1896   // This is needed if strip(1) -T is run on a binary containing swift
1897   // language symbols for example.  The option -only-dyldinfo will fake up
1898   // all symbols from the dyld export trie as well as the bind info.
1899   if (MachO && !NoDyldInfo)
1900     dumpSymbolsFromDLInfoMachO(*MachO, SymbolList);
1901 
1902   return true;
1903 }
1904 
printObjectLabel(bool PrintArchiveName,StringRef ArchiveName,StringRef ArchitectureName,StringRef ObjectFileName)1905 static void printObjectLabel(bool PrintArchiveName, StringRef ArchiveName,
1906                              StringRef ArchitectureName,
1907                              StringRef ObjectFileName) {
1908   outs() << "\n";
1909   if (ArchiveName.empty() || !PrintArchiveName)
1910     outs() << ObjectFileName;
1911   else
1912     outs() << ArchiveName << "(" << ObjectFileName << ")";
1913   if (!ArchitectureName.empty())
1914     outs() << " (for architecture " << ArchitectureName << ")";
1915   outs() << ":\n";
1916 }
1917 
hasSymbols(SymbolicFile & Obj)1918 static Expected<bool> hasSymbols(SymbolicFile &Obj) {
1919   if (DynamicSyms) {
1920     Expected<SymbolicFile::basic_symbol_iterator_range> DynamicSymsOrErr =
1921         getDynamicSyms(Obj);
1922     if (!DynamicSymsOrErr)
1923       return DynamicSymsOrErr.takeError();
1924     return !DynamicSymsOrErr->empty();
1925   }
1926   return !Obj.symbols().empty();
1927 }
1928 
printSymbolNamesFromObject(SymbolicFile & Obj,std::vector<NMSymbol> & SymbolList,bool PrintSymbolObject,bool PrintObjectLabel,StringRef ArchiveName={},StringRef ArchitectureName={},StringRef ObjectName={},bool PrintArchiveName=true)1929 static void printSymbolNamesFromObject(
1930     SymbolicFile &Obj, std::vector<NMSymbol> &SymbolList,
1931     bool PrintSymbolObject, bool PrintObjectLabel, StringRef ArchiveName = {},
1932     StringRef ArchitectureName = {}, StringRef ObjectName = {},
1933     bool PrintArchiveName = true) {
1934 
1935   if (PrintObjectLabel && !ExportSymbols)
1936     printObjectLabel(PrintArchiveName, ArchiveName, ArchitectureName,
1937                      ObjectName.empty() ? Obj.getFileName() : ObjectName);
1938 
1939   if (!getSymbolNamesFromObject(Obj, SymbolList) || ExportSymbols)
1940     return;
1941 
1942   // If there is an error in hasSymbols(), the error should be encountered in
1943   // function getSymbolNamesFromObject first.
1944   if (!cantFail(hasSymbols(Obj)) && SymbolList.empty() && !Quiet) {
1945     writeFileName(errs(), ArchiveName, ArchitectureName);
1946     errs() << "no symbols\n";
1947   }
1948 
1949   sortSymbolList(SymbolList);
1950   printSymbolList(Obj, SymbolList, PrintSymbolObject, ArchiveName,
1951                   ArchitectureName);
1952 }
1953 
dumpSymbolsNameFromMachOFilesetEntry(MachOObjectFile * Obj,std::vector<NMSymbol> & SymbolList,bool PrintSymbolObject,bool PrintObjectLabel)1954 static void dumpSymbolsNameFromMachOFilesetEntry(
1955     MachOObjectFile *Obj, std::vector<NMSymbol> &SymbolList,
1956     bool PrintSymbolObject, bool PrintObjectLabel) {
1957   auto Buf = Obj->getMemoryBufferRef();
1958   const auto *End = Obj->load_commands().end();
1959   for (const auto *It = Obj->load_commands().begin(); It != End; ++It) {
1960     const auto &Command = *It;
1961     if (Command.C.cmd != MachO::LC_FILESET_ENTRY)
1962       continue;
1963 
1964     MachO::fileset_entry_command Entry =
1965         Obj->getFilesetEntryLoadCommand(Command);
1966     auto MaybeMachO =
1967         MachOObjectFile::createMachOObjectFile(Buf, 0, 0, Entry.fileoff);
1968 
1969     if (Error Err = MaybeMachO.takeError())
1970       report_fatal_error(std::move(Err));
1971 
1972     const char *EntryName = Command.Ptr + Entry.entry_id.offset;
1973     if (EntryName)
1974       outs() << "Symbols for " << EntryName << ": \n";
1975 
1976     std::unique_ptr<MachOObjectFile> EntryMachO = std::move(MaybeMachO.get());
1977     printSymbolNamesFromObject(*EntryMachO, SymbolList, PrintSymbolObject,
1978                                PrintObjectLabel);
1979 
1980     if (std::next(It) != End)
1981       outs() << "\n";
1982   }
1983 }
1984 
dumpSymbolNamesFromObject(SymbolicFile & Obj,std::vector<NMSymbol> & SymbolList,bool PrintSymbolObject,bool PrintObjectLabel,StringRef ArchiveName={},StringRef ArchitectureName={},StringRef ObjectName={},bool PrintArchiveName=true)1985 static void dumpSymbolNamesFromObject(
1986     SymbolicFile &Obj, std::vector<NMSymbol> &SymbolList,
1987     bool PrintSymbolObject, bool PrintObjectLabel, StringRef ArchiveName = {},
1988     StringRef ArchitectureName = {}, StringRef ObjectName = {},
1989     bool PrintArchiveName = true) {
1990   if (!shouldDump(Obj))
1991     return;
1992 
1993   if (ExportSymbols && Obj.isXCOFF()) {
1994     XCOFFObjectFile *XCOFFObj = cast<XCOFFObjectFile>(&Obj);
1995     getXCOFFExports(XCOFFObj, SymbolList, ArchiveName);
1996     return;
1997   }
1998 
1999   CurrentFilename = Obj.getFileName();
2000 
2001   // Are we handling a MachO of type MH_FILESET?
2002   if (Obj.isMachO() && Obj.is64Bit() &&
2003       cast<MachOObjectFile>(&Obj)->getHeader64().filetype ==
2004           MachO::MH_FILESET) {
2005     dumpSymbolsNameFromMachOFilesetEntry(cast<MachOObjectFile>(&Obj),
2006                                          SymbolList, PrintSymbolObject,
2007                                          PrintObjectLabel);
2008     return;
2009   }
2010 
2011   printSymbolNamesFromObject(Obj, SymbolList, PrintSymbolObject,
2012                              PrintObjectLabel, ArchiveName, ArchitectureName,
2013                              ObjectName, PrintArchiveName);
2014 }
2015 
2016 // checkMachOAndArchFlags() checks to see if the SymbolicFile is a Mach-O file
2017 // and if it is and there is a list of architecture flags is specified then
2018 // check to make sure this Mach-O file is one of those architectures or all
2019 // architectures was specificed.  If not then an error is generated and this
2020 // routine returns false.  Else it returns true.
checkMachOAndArchFlags(SymbolicFile * O,StringRef Filename)2021 static bool checkMachOAndArchFlags(SymbolicFile *O, StringRef Filename) {
2022   auto *MachO = dyn_cast<MachOObjectFile>(O);
2023 
2024   if (!MachO || ArchAll || ArchFlags.empty())
2025     return true;
2026 
2027   MachO::mach_header H;
2028   MachO::mach_header_64 H_64;
2029   Triple T;
2030   const char *McpuDefault, *ArchFlag;
2031   if (MachO->is64Bit()) {
2032     H_64 = MachO->MachOObjectFile::getHeader64();
2033     T = MachOObjectFile::getArchTriple(H_64.cputype, H_64.cpusubtype,
2034                                        &McpuDefault, &ArchFlag);
2035   } else {
2036     H = MachO->MachOObjectFile::getHeader();
2037     T = MachOObjectFile::getArchTriple(H.cputype, H.cpusubtype,
2038                                        &McpuDefault, &ArchFlag);
2039   }
2040   const std::string ArchFlagName(ArchFlag);
2041   if (!llvm::is_contained(ArchFlags, ArchFlagName)) {
2042     error("No architecture specified", Filename);
2043     return false;
2044   }
2045   return true;
2046 }
2047 
printArchiveMap(iterator_range<Archive::symbol_iterator> & map,StringRef Filename)2048 static void printArchiveMap(iterator_range<Archive::symbol_iterator> &map,
2049                             StringRef Filename) {
2050   for (auto I : map) {
2051     Expected<Archive::Child> C = I.getMember();
2052     if (!C) {
2053       error(C.takeError(), Filename);
2054       break;
2055     }
2056     Expected<StringRef> FileNameOrErr = C->getName();
2057     if (!FileNameOrErr) {
2058       error(FileNameOrErr.takeError(), Filename);
2059       break;
2060     }
2061     StringRef SymName = I.getName();
2062     outs() << SymName << " in " << FileNameOrErr.get() << "\n";
2063   }
2064 
2065   outs() << "\n";
2066 }
2067 
dumpArchiveMap(Archive * A,StringRef Filename)2068 static void dumpArchiveMap(Archive *A, StringRef Filename) {
2069   auto Map = A->symbols();
2070   if (!Map.empty()) {
2071     outs() << "Archive map\n";
2072     printArchiveMap(Map, Filename);
2073   }
2074 
2075   auto ECMap = A->ec_symbols();
2076   if (!ECMap) {
2077     warn(ECMap.takeError(), Filename);
2078   } else if (!ECMap->empty()) {
2079     outs() << "Archive EC map\n";
2080     printArchiveMap(*ECMap, Filename);
2081   }
2082 }
2083 
dumpArchive(Archive * A,std::vector<NMSymbol> & SymbolList,StringRef Filename,LLVMContext * ContextPtr)2084 static void dumpArchive(Archive *A, std::vector<NMSymbol> &SymbolList,
2085                         StringRef Filename, LLVMContext *ContextPtr) {
2086   if (ArchiveMap)
2087     dumpArchiveMap(A, Filename);
2088 
2089   Error Err = Error::success();
2090   for (auto &C : A->children(Err)) {
2091     Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(ContextPtr);
2092     if (!ChildOrErr) {
2093       if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
2094         error(std::move(E), Filename, C);
2095       continue;
2096     }
2097     if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
2098       if (!MachOPrintSizeWarning && PrintSize && isa<MachOObjectFile>(O)) {
2099         WithColor::warning(errs(), ToolName)
2100             << "sizes with -print-size for Mach-O files are always zero.\n";
2101         MachOPrintSizeWarning = true;
2102       }
2103       if (!checkMachOAndArchFlags(O, Filename))
2104         return;
2105       dumpSymbolNamesFromObject(*O, SymbolList, /*PrintSymbolObject=*/false,
2106                                 !PrintFileName, Filename,
2107                                 /*ArchitectureName=*/{}, O->getFileName(),
2108                                 /*PrintArchiveName=*/false);
2109     }
2110   }
2111   if (Err)
2112     error(std::move(Err), A->getFileName());
2113 }
2114 
dumpMachOUniversalBinaryMatchArchFlags(MachOUniversalBinary * UB,std::vector<NMSymbol> & SymbolList,StringRef Filename,LLVMContext * ContextPtr)2115 static void dumpMachOUniversalBinaryMatchArchFlags(
2116     MachOUniversalBinary *UB, std::vector<NMSymbol> &SymbolList,
2117     StringRef Filename, LLVMContext *ContextPtr) {
2118   // Look for a slice in the universal binary that matches each ArchFlag.
2119   bool ArchFound;
2120   for (unsigned i = 0; i < ArchFlags.size(); ++i) {
2121     ArchFound = false;
2122     for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
2123                                                E = UB->end_objects();
2124          I != E; ++I) {
2125       if (ArchFlags[i] == I->getArchFlagName()) {
2126         ArchFound = true;
2127         Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
2128         std::string ArchiveName;
2129         std::string ArchitectureName;
2130         ArchiveName.clear();
2131         ArchitectureName.clear();
2132         if (ObjOrErr) {
2133           ObjectFile &Obj = *ObjOrErr.get();
2134           if (ArchFlags.size() > 1)
2135             ArchitectureName = I->getArchFlagName();
2136           dumpSymbolNamesFromObject(Obj, SymbolList,
2137                                     /*PrintSymbolObject=*/false,
2138                                     (ArchFlags.size() > 1) && !PrintFileName,
2139                                     ArchiveName, ArchitectureName);
2140         } else if (auto E =
2141                        isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
2142           error(std::move(E), Filename,
2143                 ArchFlags.size() > 1 ? StringRef(I->getArchFlagName())
2144                                      : StringRef());
2145           continue;
2146         } else if (Expected<std::unique_ptr<Archive>> AOrErr =
2147                        I->getAsArchive()) {
2148           std::unique_ptr<Archive> &A = *AOrErr;
2149           Error Err = Error::success();
2150           for (auto &C : A->children(Err)) {
2151             Expected<std::unique_ptr<Binary>> ChildOrErr =
2152                 C.getAsBinary(ContextPtr);
2153             if (!ChildOrErr) {
2154               if (auto E =
2155                       isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) {
2156                 error(std::move(E), Filename, C,
2157                       ArchFlags.size() > 1 ? StringRef(I->getArchFlagName())
2158                                            : StringRef());
2159               }
2160               continue;
2161             }
2162             if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
2163               ArchiveName = std::string(A->getFileName());
2164               if (ArchFlags.size() > 1)
2165                 ArchitectureName = I->getArchFlagName();
2166               dumpSymbolNamesFromObject(
2167                   *O, SymbolList, /*PrintSymbolObject=*/false, !PrintFileName,
2168                   ArchiveName, ArchitectureName);
2169             }
2170           }
2171           if (Err)
2172             error(std::move(Err), A->getFileName());
2173         } else {
2174           consumeError(AOrErr.takeError());
2175           error(Filename + " for architecture " +
2176                     StringRef(I->getArchFlagName()) +
2177                     " is not a Mach-O file or an archive file",
2178                 "Mach-O universal file");
2179         }
2180       }
2181     }
2182     if (!ArchFound) {
2183       error(ArchFlags[i],
2184             "file: " + Filename + " does not contain architecture");
2185       return;
2186     }
2187   }
2188 }
2189 
2190 // Returns true If the binary contains a slice that matches the host
2191 // architecture, or false otherwise.
dumpMachOUniversalBinaryMatchHost(MachOUniversalBinary * UB,std::vector<NMSymbol> & SymbolList,StringRef Filename,LLVMContext * ContextPtr)2192 static bool dumpMachOUniversalBinaryMatchHost(MachOUniversalBinary *UB,
2193                                               std::vector<NMSymbol> &SymbolList,
2194                                               StringRef Filename,
2195                                               LLVMContext *ContextPtr) {
2196   Triple HostTriple = MachOObjectFile::getHostArch();
2197   StringRef HostArchName = HostTriple.getArchName();
2198   for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
2199                                              E = UB->end_objects();
2200        I != E; ++I) {
2201     if (HostArchName == I->getArchFlagName()) {
2202       Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
2203       std::string ArchiveName;
2204       if (ObjOrErr) {
2205         ObjectFile &Obj = *ObjOrErr.get();
2206         dumpSymbolNamesFromObject(Obj, SymbolList, /*PrintSymbolObject=*/false,
2207                                   /*PrintObjectLabel=*/false);
2208       } else if (auto E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError()))
2209         error(std::move(E), Filename);
2210       else if (Expected<std::unique_ptr<Archive>> AOrErr = I->getAsArchive()) {
2211         std::unique_ptr<Archive> &A = *AOrErr;
2212         Error Err = Error::success();
2213         for (auto &C : A->children(Err)) {
2214           Expected<std::unique_ptr<Binary>> ChildOrErr =
2215               C.getAsBinary(ContextPtr);
2216           if (!ChildOrErr) {
2217             if (auto E =
2218                     isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
2219               error(std::move(E), Filename, C);
2220             continue;
2221           }
2222           if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
2223             ArchiveName = std::string(A->getFileName());
2224             dumpSymbolNamesFromObject(*O, SymbolList,
2225                                       /*PrintSymbolObject=*/false,
2226                                       !PrintFileName, ArchiveName);
2227           }
2228         }
2229         if (Err)
2230           error(std::move(Err), A->getFileName());
2231       } else {
2232         consumeError(AOrErr.takeError());
2233         error(Filename + " for architecture " +
2234                   StringRef(I->getArchFlagName()) +
2235                   " is not a Mach-O file or an archive file",
2236               "Mach-O universal file");
2237       }
2238       return true;
2239     }
2240   }
2241   return false;
2242 }
2243 
dumpMachOUniversalBinaryArchAll(MachOUniversalBinary * UB,std::vector<NMSymbol> & SymbolList,StringRef Filename,LLVMContext * ContextPtr)2244 static void dumpMachOUniversalBinaryArchAll(MachOUniversalBinary *UB,
2245                                             std::vector<NMSymbol> &SymbolList,
2246                                             StringRef Filename,
2247                                             LLVMContext *ContextPtr) {
2248   bool moreThanOneArch = UB->getNumberOfObjects() > 1;
2249   for (const MachOUniversalBinary::ObjectForArch &O : UB->objects()) {
2250     Expected<std::unique_ptr<ObjectFile>> ObjOrErr = O.getAsObjectFile();
2251     std::string ArchiveName;
2252     std::string ArchitectureName;
2253     ArchiveName.clear();
2254     ArchitectureName.clear();
2255     if (ObjOrErr) {
2256       ObjectFile &Obj = *ObjOrErr.get();
2257       if (isa<MachOObjectFile>(Obj) && moreThanOneArch)
2258         ArchitectureName = O.getArchFlagName();
2259       dumpSymbolNamesFromObject(Obj, SymbolList, /*PrintSymbolObject=*/false,
2260                                 !PrintFileName, ArchiveName, ArchitectureName);
2261     } else if (auto E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
2262       error(std::move(E), Filename,
2263             moreThanOneArch ? StringRef(O.getArchFlagName()) : StringRef());
2264       continue;
2265     } else if (Expected<std::unique_ptr<Archive>> AOrErr = O.getAsArchive()) {
2266       std::unique_ptr<Archive> &A = *AOrErr;
2267       Error Err = Error::success();
2268       for (auto &C : A->children(Err)) {
2269         Expected<std::unique_ptr<Binary>> ChildOrErr =
2270             C.getAsBinary(ContextPtr);
2271         if (!ChildOrErr) {
2272           if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
2273             error(std::move(E), Filename, C,
2274                   moreThanOneArch ? StringRef(ArchitectureName) : StringRef());
2275           continue;
2276         }
2277         if (SymbolicFile *F = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
2278           ArchiveName = std::string(A->getFileName());
2279           if (isa<MachOObjectFile>(F) && moreThanOneArch)
2280             ArchitectureName = O.getArchFlagName();
2281           dumpSymbolNamesFromObject(*F, SymbolList, /*PrintSymbolObject=*/false,
2282                                     !PrintFileName, ArchiveName,
2283                                     ArchitectureName);
2284         }
2285       }
2286       if (Err)
2287         error(std::move(Err), A->getFileName());
2288     } else {
2289       consumeError(AOrErr.takeError());
2290       error(Filename + " for architecture " + StringRef(O.getArchFlagName()) +
2291                 " is not a Mach-O file or an archive file",
2292             "Mach-O universal file");
2293     }
2294   }
2295 }
2296 
dumpMachOUniversalBinary(MachOUniversalBinary * UB,std::vector<NMSymbol> & SymbolList,StringRef Filename,LLVMContext * ContextPtr)2297 static void dumpMachOUniversalBinary(MachOUniversalBinary *UB,
2298                                      std::vector<NMSymbol> &SymbolList,
2299                                      StringRef Filename,
2300                                      LLVMContext *ContextPtr) {
2301   // If we have a list of architecture flags specified dump only those.
2302   if (!ArchAll && !ArchFlags.empty()) {
2303     dumpMachOUniversalBinaryMatchArchFlags(UB, SymbolList, Filename,
2304                                            ContextPtr);
2305     return;
2306   }
2307 
2308   // No architecture flags were specified so if this contains a slice that
2309   // matches the host architecture dump only that.
2310   if (!ArchAll &&
2311       dumpMachOUniversalBinaryMatchHost(UB, SymbolList, Filename, ContextPtr))
2312     return;
2313 
2314   // Either all architectures have been specified or none have been specified
2315   // and this does not contain the host architecture so dump all the slices.
2316   dumpMachOUniversalBinaryArchAll(UB, SymbolList, Filename, ContextPtr);
2317 }
2318 
dumpTapiUniversal(TapiUniversal * TU,std::vector<NMSymbol> & SymbolList,StringRef Filename)2319 static void dumpTapiUniversal(TapiUniversal *TU,
2320                               std::vector<NMSymbol> &SymbolList,
2321                               StringRef Filename) {
2322   for (const TapiUniversal::ObjectForArch &I : TU->objects()) {
2323     StringRef ArchName = I.getArchFlagName();
2324     const bool ShowArch =
2325         ArchFlags.empty() || llvm::is_contained(ArchFlags, ArchName);
2326     if (!ShowArch)
2327       continue;
2328     if (!AddInlinedInfo && !I.isTopLevelLib())
2329       continue;
2330     if (auto ObjOrErr = I.getAsObjectFile())
2331       dumpSymbolNamesFromObject(
2332           *ObjOrErr.get(), SymbolList, /*PrintSymbolObject=*/false,
2333           /*PrintObjectLabel=*/true,
2334           /*ArchiveName=*/{}, ArchName, I.getInstallName());
2335     else if (Error E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
2336       error(std::move(E), Filename, ArchName);
2337     }
2338   }
2339 }
2340 
dumpSymbolicFile(SymbolicFile * O,std::vector<NMSymbol> & SymbolList,StringRef Filename)2341 static void dumpSymbolicFile(SymbolicFile *O, std::vector<NMSymbol> &SymbolList,
2342                              StringRef Filename) {
2343   if (!MachOPrintSizeWarning && PrintSize && isa<MachOObjectFile>(O)) {
2344     WithColor::warning(errs(), ToolName)
2345         << "sizes with --print-size for Mach-O files are always zero.\n";
2346     MachOPrintSizeWarning = true;
2347   }
2348   if (!checkMachOAndArchFlags(O, Filename))
2349     return;
2350   dumpSymbolNamesFromObject(*O, SymbolList, /*PrintSymbolObject=*/true,
2351                             /*PrintObjectLabel=*/false);
2352 }
2353 
dumpSymbolNamesFromFile(StringRef Filename)2354 static std::vector<NMSymbol> dumpSymbolNamesFromFile(StringRef Filename) {
2355   std::vector<NMSymbol> SymbolList;
2356   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
2357       MemoryBuffer::getFileOrSTDIN(Filename);
2358   if (error(BufferOrErr.getError(), Filename))
2359     return SymbolList;
2360 
2361   // Ignore AIX linker import files (these files start with "#!"), when
2362   // exporting symbols.
2363   const char *BuffStart = (*BufferOrErr)->getBufferStart();
2364   size_t BufferSize = (*BufferOrErr)->getBufferSize();
2365   if (ExportSymbols && BufferSize >= 2 && BuffStart[0] == '#' &&
2366       BuffStart[1] == '!')
2367     return SymbolList;
2368 
2369   LLVMContext Context;
2370   LLVMContext *ContextPtr = NoLLVMBitcode ? nullptr : &Context;
2371   Expected<std::unique_ptr<Binary>> BinaryOrErr =
2372       createBinary(BufferOrErr.get()->getMemBufferRef(), ContextPtr);
2373   if (!BinaryOrErr) {
2374     error(BinaryOrErr.takeError(), Filename);
2375     return SymbolList;
2376   }
2377   Binary &Bin = *BinaryOrErr.get();
2378   if (Archive *A = dyn_cast<Archive>(&Bin))
2379     dumpArchive(A, SymbolList, Filename, ContextPtr);
2380   else if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin))
2381     dumpMachOUniversalBinary(UB, SymbolList, Filename, ContextPtr);
2382   else if (TapiUniversal *TU = dyn_cast<TapiUniversal>(&Bin))
2383     dumpTapiUniversal(TU, SymbolList, Filename);
2384   else if (SymbolicFile *O = dyn_cast<SymbolicFile>(&Bin))
2385     dumpSymbolicFile(O, SymbolList, Filename);
2386   return SymbolList;
2387 }
2388 
2389 static void
exportSymbolNamesFromFiles(const std::vector<std::string> & InputFilenames)2390 exportSymbolNamesFromFiles(const std::vector<std::string> &InputFilenames) {
2391   std::vector<NMSymbol> SymbolList;
2392   for (const auto &FileName : InputFilenames) {
2393     std::vector<NMSymbol> FileSymList = dumpSymbolNamesFromFile(FileName);
2394     SymbolList.insert(SymbolList.end(), FileSymList.begin(), FileSymList.end());
2395   }
2396 
2397   // Delete symbols which should not be printed from SymolList.
2398   llvm::erase_if(SymbolList,
2399                  [](const NMSymbol &s) { return !s.shouldPrint(); });
2400   sortSymbolList(SymbolList);
2401   SymbolList.erase(std::unique(SymbolList.begin(), SymbolList.end()),
2402                    SymbolList.end());
2403   printExportSymbolList(SymbolList);
2404 }
2405 
llvm_nm_main(int argc,char ** argv,const llvm::ToolContext &)2406 int llvm_nm_main(int argc, char **argv, const llvm::ToolContext &) {
2407   BumpPtrAllocator A;
2408   StringSaver Saver(A);
2409   NmOptTable Tbl;
2410   ToolName = argv[0];
2411   opt::InputArgList Args =
2412       Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
2413         error(Msg);
2414         exit(1);
2415       });
2416   if (Args.hasArg(OPT_help)) {
2417     Tbl.printHelp(
2418         outs(),
2419         (Twine(ToolName) + " [options] <input object files>").str().c_str(),
2420         "LLVM symbol table dumper");
2421     // TODO Replace this with OptTable API once it adds extrahelp support.
2422     outs() << "\nPass @FILE as argument to read options from FILE.\n";
2423     return 0;
2424   }
2425   if (Args.hasArg(OPT_version)) {
2426     // This needs to contain the word "GNU", libtool looks for that string.
2427     outs() << "llvm-nm, compatible with GNU nm" << '\n';
2428     cl::PrintVersionMessage();
2429     return 0;
2430   }
2431 
2432   DebugSyms = Args.hasArg(OPT_debug_syms);
2433   DefinedOnly = Args.hasArg(OPT_defined_only);
2434   Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, false);
2435   DynamicSyms = Args.hasArg(OPT_dynamic);
2436   ExternalOnly = Args.hasArg(OPT_extern_only);
2437   StringRef V = Args.getLastArgValue(OPT_format_EQ, "bsd");
2438   if (V == "bsd")
2439     OutputFormat = bsd;
2440   else if (V == "posix")
2441     OutputFormat = posix;
2442   else if (V == "sysv")
2443     OutputFormat = sysv;
2444   else if (V == "darwin")
2445     OutputFormat = darwin;
2446   else if (V == "just-symbols")
2447     OutputFormat = just_symbols;
2448   else
2449     error("--format value should be one of: bsd, posix, sysv, darwin, "
2450           "just-symbols");
2451   LineNumbers = Args.hasArg(OPT_line_numbers);
2452   NoLLVMBitcode = Args.hasArg(OPT_no_llvm_bc);
2453   NoSort = Args.hasArg(OPT_no_sort);
2454   NoWeakSymbols = Args.hasArg(OPT_no_weak);
2455   NumericSort = Args.hasArg(OPT_numeric_sort);
2456   ArchiveMap = Args.hasArg(OPT_print_armap);
2457   PrintFileName = Args.hasArg(OPT_print_file_name);
2458   PrintSize = Args.hasArg(OPT_print_size);
2459   ReverseSort = Args.hasArg(OPT_reverse_sort);
2460   ExportSymbols = Args.hasArg(OPT_export_symbols);
2461   if (ExportSymbols) {
2462     ExternalOnly = true;
2463     DefinedOnly = true;
2464   }
2465 
2466   Quiet = Args.hasArg(OPT_quiet);
2467   V = Args.getLastArgValue(OPT_radix_EQ, "x");
2468   if (V == "o")
2469     AddressRadix = Radix::o;
2470   else if (V == "d")
2471     AddressRadix = Radix::d;
2472   else if (V == "x")
2473     AddressRadix = Radix::x;
2474   else
2475     error("--radix value should be one of: 'o' (octal), 'd' (decimal), 'x' "
2476           "(hexadecimal)");
2477   SizeSort = Args.hasArg(OPT_size_sort);
2478   SpecialSyms = Args.hasArg(OPT_special_syms);
2479   UndefinedOnly = Args.hasArg(OPT_undefined_only);
2480   WithoutAliases = Args.hasArg(OPT_without_aliases);
2481 
2482   // Get BitMode from enviornment variable "OBJECT_MODE" for AIX OS, if
2483   // specified.
2484   Triple HostTriple(sys::getProcessTriple());
2485   if (HostTriple.isOSAIX()) {
2486     BitMode = StringSwitch<BitModeTy>(getenv("OBJECT_MODE"))
2487                   .Case("32", BitModeTy::Bit32)
2488                   .Case("64", BitModeTy::Bit64)
2489                   .Case("32_64", BitModeTy::Bit32_64)
2490                   .Case("any", BitModeTy::Any)
2491                   .Default(BitModeTy::Bit32);
2492   } else
2493     BitMode = BitModeTy::Any;
2494 
2495   if (Arg *A = Args.getLastArg(OPT_X)) {
2496     StringRef Mode = A->getValue();
2497     if (Mode == "32")
2498       BitMode = BitModeTy::Bit32;
2499     else if (Mode == "64")
2500       BitMode = BitModeTy::Bit64;
2501     else if (Mode == "32_64")
2502       BitMode = BitModeTy::Bit32_64;
2503     else if (Mode == "any")
2504       BitMode = BitModeTy::Any;
2505     else
2506       error("-X value should be one of: 32, 64, 32_64, (default) any");
2507   }
2508 
2509   // Mach-O specific options.
2510   FormatMachOasHex = Args.hasArg(OPT_x);
2511   AddDyldInfo = Args.hasArg(OPT_add_dyldinfo);
2512   AddInlinedInfo = Args.hasArg(OPT_add_inlinedinfo);
2513   DyldInfoOnly = Args.hasArg(OPT_dyldinfo_only);
2514   NoDyldInfo = Args.hasArg(OPT_no_dyldinfo);
2515 
2516   // XCOFF specific options.
2517   NoRsrc = Args.hasArg(OPT_no_rsrc);
2518 
2519   // llvm-nm only reads binary files.
2520   if (error(sys::ChangeStdinToBinary()))
2521     return 1;
2522 
2523   // These calls are needed so that we can read bitcode correctly.
2524   llvm::InitializeAllTargetInfos();
2525   llvm::InitializeAllTargetMCs();
2526   llvm::InitializeAllAsmParsers();
2527 
2528   // The relative order of these is important. If you pass --size-sort it should
2529   // only print out the size. However, if you pass -S --size-sort, it should
2530   // print out both the size and address.
2531   if (SizeSort && !PrintSize)
2532     PrintAddress = false;
2533   if (OutputFormat == sysv || SizeSort)
2534     PrintSize = true;
2535 
2536   for (const auto *A : Args.filtered(OPT_arch_EQ)) {
2537     SmallVector<StringRef, 2> Values;
2538     llvm::SplitString(A->getValue(), Values, ",");
2539     for (StringRef V : Values) {
2540       if (V == "all")
2541         ArchAll = true;
2542       else if (MachOObjectFile::isValidArch(V))
2543         ArchFlags.push_back(V);
2544       else
2545         error("Unknown architecture named '" + V + "'",
2546               "for the --arch option");
2547     }
2548   }
2549 
2550   // Mach-O takes -s to accept two arguments. We emulate this by iterating over
2551   // both OPT_s and OPT_INPUT.
2552   std::vector<std::string> InputFilenames;
2553   int SegSectArgs = 0;
2554   for (opt::Arg *A : Args.filtered(OPT_s, OPT_INPUT)) {
2555     if (SegSectArgs > 0) {
2556       --SegSectArgs;
2557       SegSect.push_back(A->getValue());
2558     } else if (A->getOption().matches(OPT_s)) {
2559       SegSectArgs = 2;
2560     } else {
2561       InputFilenames.push_back(A->getValue());
2562     }
2563   }
2564   if (!SegSect.empty() && SegSect.size() != 2)
2565     error("bad number of arguments (must be two arguments)",
2566           "for the -s option");
2567 
2568   if (InputFilenames.empty())
2569     InputFilenames.push_back("a.out");
2570   if (InputFilenames.size() > 1)
2571     MultipleFiles = true;
2572 
2573   if (NoDyldInfo && (AddDyldInfo || DyldInfoOnly))
2574     error("--no-dyldinfo can't be used with --add-dyldinfo or --dyldinfo-only");
2575 
2576   if (ExportSymbols)
2577     exportSymbolNamesFromFiles(InputFilenames);
2578   else
2579     llvm::for_each(InputFilenames, dumpSymbolNamesFromFile);
2580 
2581   if (HadError)
2582     return 1;
2583   return 0;
2584 }
2585