1 //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "obj2yaml.h"
10 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
11 #include "llvm/Object/MachOUniversal.h"
12 #include "llvm/ObjectYAML/DWARFYAML.h"
13 #include "llvm/ObjectYAML/ObjectYAML.h"
14 #include "llvm/Support/Error.h"
15 #include "llvm/Support/ErrorHandling.h"
16 #include "llvm/Support/LEB128.h"
17 
18 #include <string.h> // for memcpy
19 
20 using namespace llvm;
21 
22 class MachODumper {
23 
24   template <typename StructType>
25   Expected<const char *> processLoadCommandData(
26       MachOYAML::LoadCommand &LC,
27       const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
28       MachOYAML::Object &Y);
29 
30   const object::MachOObjectFile &Obj;
31   std::unique_ptr<DWARFContext> DWARFCtx;
32   void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
33   Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
34   void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
35   void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y);
36   void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes,
37                        ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false);
38   void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y);
39   void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y);
40 
41   template <typename SectionType>
42   Expected<MachOYAML::Section> constructSectionCommon(SectionType Sec,
43                                                       size_t SecIndex);
44   template <typename SectionType>
45   Expected<MachOYAML::Section> constructSection(SectionType Sec,
46                                                 size_t SecIndex);
47   template <typename SectionType, typename SegmentType>
48   Expected<const char *>
49   extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
50                   std::vector<MachOYAML::Section> &Sections,
51                   MachOYAML::Object &Y);
52 
53 public:
MachODumper(const object::MachOObjectFile & O,std::unique_ptr<DWARFContext> DCtx)54   MachODumper(const object::MachOObjectFile &O,
55               std::unique_ptr<DWARFContext> DCtx)
56       : Obj(O), DWARFCtx(std::move(DCtx)) {}
57   Expected<std::unique_ptr<MachOYAML::Object>> dump();
58 };
59 
60 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
61   case MachO::LCName:                                                          \
62     memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr,                    \
63            sizeof(MachO::LCStruct));                                           \
64     if (Obj.isLittleEndian() != sys::IsLittleEndianHost)                       \
65       MachO::swapStruct(LC.Data.LCStruct##_data);                              \
66     if (Expected<const char *> ExpectedEndPtr =                                \
67             processLoadCommandData<MachO::LCStruct>(LC, LoadCmd, *Y.get()))    \
68       EndPtr = *ExpectedEndPtr;                                                \
69     else                                                                       \
70       return ExpectedEndPtr.takeError();                                       \
71     break;
72 
73 template <typename SectionType>
74 Expected<MachOYAML::Section>
constructSectionCommon(SectionType Sec,size_t SecIndex)75 MachODumper::constructSectionCommon(SectionType Sec, size_t SecIndex) {
76   MachOYAML::Section TempSec;
77   memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16);
78   memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16);
79   TempSec.addr = Sec.addr;
80   TempSec.size = Sec.size;
81   TempSec.offset = Sec.offset;
82   TempSec.align = Sec.align;
83   TempSec.reloff = Sec.reloff;
84   TempSec.nreloc = Sec.nreloc;
85   TempSec.flags = Sec.flags;
86   TempSec.reserved1 = Sec.reserved1;
87   TempSec.reserved2 = Sec.reserved2;
88   TempSec.reserved3 = 0;
89   if (!MachO::isVirtualSection(Sec.flags & MachO::SECTION_TYPE))
90     TempSec.content =
91         yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
92 
93   if (Expected<object::SectionRef> SecRef = Obj.getSection(SecIndex)) {
94     TempSec.relocations.reserve(TempSec.nreloc);
95     for (const object::RelocationRef &Reloc : SecRef->relocations()) {
96       const object::DataRefImpl Rel = Reloc.getRawDataRefImpl();
97       const MachO::any_relocation_info RE = Obj.getRelocation(Rel);
98       MachOYAML::Relocation R;
99       R.address = Obj.getAnyRelocationAddress(RE);
100       R.is_pcrel = Obj.getAnyRelocationPCRel(RE);
101       R.length = Obj.getAnyRelocationLength(RE);
102       R.type = Obj.getAnyRelocationType(RE);
103       R.is_scattered = Obj.isRelocationScattered(RE);
104       R.symbolnum = (R.is_scattered ? 0 : Obj.getPlainRelocationSymbolNum(RE));
105       R.is_extern =
106           (R.is_scattered ? false : Obj.getPlainRelocationExternal(RE));
107       R.value = (R.is_scattered ? Obj.getScatteredRelocationValue(RE) : 0);
108       TempSec.relocations.push_back(R);
109     }
110   } else {
111     return SecRef.takeError();
112   }
113   return TempSec;
114 }
115 
116 template <>
constructSection(MachO::section Sec,size_t SecIndex)117 Expected<MachOYAML::Section> MachODumper::constructSection(MachO::section Sec,
118                                                            size_t SecIndex) {
119   Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
120   if (TempSec)
121     TempSec->reserved3 = 0;
122   return TempSec;
123 }
124 
125 template <>
126 Expected<MachOYAML::Section>
constructSection(MachO::section_64 Sec,size_t SecIndex)127 MachODumper::constructSection(MachO::section_64 Sec, size_t SecIndex) {
128   Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
129   if (TempSec)
130     TempSec->reserved3 = Sec.reserved3;
131   return TempSec;
132 }
133 
dumpDebugSection(StringRef SecName,DWARFContext & DCtx,DWARFYAML::Data & DWARF)134 static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx,
135                               DWARFYAML::Data &DWARF) {
136   if (SecName == "__debug_abbrev") {
137     dumpDebugAbbrev(DCtx, DWARF);
138     return Error::success();
139   }
140   if (SecName == "__debug_aranges")
141     return dumpDebugARanges(DCtx, DWARF);
142   if (SecName == "__debug_info") {
143     dumpDebugInfo(DCtx, DWARF);
144     return Error::success();
145   }
146   if (SecName == "__debug_line") {
147     dumpDebugLines(DCtx, DWARF);
148     return Error::success();
149   }
150   if (SecName.startswith("__debug_pub")) {
151     // FIXME: We should extract pub-section dumpers from this function.
152     dumpDebugPubSections(DCtx, DWARF);
153     return Error::success();
154   }
155   if (SecName == "__debug_ranges")
156     return dumpDebugRanges(DCtx, DWARF);
157   if (SecName == "__debug_str")
158     return dumpDebugStrings(DCtx, DWARF);
159   return createStringError(errc::not_supported,
160                            "dumping " + SecName + " section is not supported");
161 }
162 
163 template <typename SectionType, typename SegmentType>
extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,std::vector<MachOYAML::Section> & Sections,MachOYAML::Object & Y)164 Expected<const char *> MachODumper::extractSections(
165     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
166     std::vector<MachOYAML::Section> &Sections, MachOYAML::Object &Y) {
167   auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
168   const SectionType *Curr =
169       reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
170   for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
171     SectionType Sec;
172     memcpy((void *)&Sec, Curr, sizeof(SectionType));
173     if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
174       MachO::swapStruct(Sec);
175     // For MachO section indices start from 1.
176     if (Expected<MachOYAML::Section> S =
177             constructSection(Sec, Sections.size() + 1)) {
178       StringRef SecName(S->sectname);
179       if (SecName.startswith("__debug_")) {
180         // If the DWARF section cannot be successfully parsed, emit raw content
181         // instead of an entry in the DWARF section of the YAML.
182         if (Error Err = dumpDebugSection(SecName, *DWARFCtx.get(), Y.DWARF))
183           consumeError(std::move(Err));
184         else
185           S->content.reset();
186       }
187       Sections.push_back(std::move(*S));
188     } else
189       return S.takeError();
190   }
191   return reinterpret_cast<const char *>(Curr);
192 }
193 
194 template <typename StructType>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)195 Expected<const char *> MachODumper::processLoadCommandData(
196     MachOYAML::LoadCommand &LC,
197     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
198     MachOYAML::Object &Y) {
199   return LoadCmd.Ptr + sizeof(StructType);
200 }
201 
202 template <>
203 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)204 MachODumper::processLoadCommandData<MachO::segment_command>(
205     MachOYAML::LoadCommand &LC,
206     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
207     MachOYAML::Object &Y) {
208   return extractSections<MachO::section, MachO::segment_command>(
209       LoadCmd, LC.Sections, Y);
210 }
211 
212 template <>
213 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)214 MachODumper::processLoadCommandData<MachO::segment_command_64>(
215     MachOYAML::LoadCommand &LC,
216     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
217     MachOYAML::Object &Y) {
218   return extractSections<MachO::section_64, MachO::segment_command_64>(
219       LoadCmd, LC.Sections, Y);
220 }
221 
222 template <typename StructType>
223 const char *
readString(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd)224 readString(MachOYAML::LoadCommand &LC,
225            const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
226   auto Start = LoadCmd.Ptr + sizeof(StructType);
227   auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType);
228   auto Size = strnlen(Start, MaxSize);
229   LC.PayloadString = StringRef(Start, Size).str();
230   return Start + Size;
231 }
232 
233 template <>
234 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)235 MachODumper::processLoadCommandData<MachO::dylib_command>(
236     MachOYAML::LoadCommand &LC,
237     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
238     MachOYAML::Object &Y) {
239   return readString<MachO::dylib_command>(LC, LoadCmd);
240 }
241 
242 template <>
243 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)244 MachODumper::processLoadCommandData<MachO::dylinker_command>(
245     MachOYAML::LoadCommand &LC,
246     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
247     MachOYAML::Object &Y) {
248   return readString<MachO::dylinker_command>(LC, LoadCmd);
249 }
250 
251 template <>
252 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)253 MachODumper::processLoadCommandData<MachO::rpath_command>(
254     MachOYAML::LoadCommand &LC,
255     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
256     MachOYAML::Object &Y) {
257   return readString<MachO::rpath_command>(LC, LoadCmd);
258 }
259 
260 template <>
261 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)262 MachODumper::processLoadCommandData<MachO::build_version_command>(
263     MachOYAML::LoadCommand &LC,
264     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
265     MachOYAML::Object &Y) {
266   auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command);
267   auto NTools = LC.Data.build_version_command_data.ntools;
268   for (unsigned i = 0; i < NTools; ++i) {
269     auto Curr = Start + i * sizeof(MachO::build_tool_version);
270     MachO::build_tool_version BV;
271     memcpy((void *)&BV, Curr, sizeof(MachO::build_tool_version));
272     if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
273       MachO::swapStruct(BV);
274     LC.Tools.push_back(BV);
275   }
276   return Start + NTools * sizeof(MachO::build_tool_version);
277 }
278 
dump()279 Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
280   auto Y = std::make_unique<MachOYAML::Object>();
281   Y->IsLittleEndian = Obj.isLittleEndian();
282   dumpHeader(Y);
283   if (Error Err = dumpLoadCommands(Y))
284     return std::move(Err);
285   dumpLinkEdit(Y);
286 
287   return std::move(Y);
288 }
289 
dumpHeader(std::unique_ptr<MachOYAML::Object> & Y)290 void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) {
291   Y->Header.magic = Obj.getHeader().magic;
292   Y->Header.cputype = Obj.getHeader().cputype;
293   Y->Header.cpusubtype = Obj.getHeader().cpusubtype;
294   Y->Header.filetype = Obj.getHeader().filetype;
295   Y->Header.ncmds = Obj.getHeader().ncmds;
296   Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds;
297   Y->Header.flags = Obj.getHeader().flags;
298   Y->Header.reserved = 0;
299 }
300 
dumpLoadCommands(std::unique_ptr<MachOYAML::Object> & Y)301 Error MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) {
302   for (auto LoadCmd : Obj.load_commands()) {
303     MachOYAML::LoadCommand LC;
304     const char *EndPtr = LoadCmd.Ptr;
305     switch (LoadCmd.C.cmd) {
306     default:
307       memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr,
308              sizeof(MachO::load_command));
309       if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
310         MachO::swapStruct(LC.Data.load_command_data);
311       if (Expected<const char *> ExpectedEndPtr =
312               processLoadCommandData<MachO::load_command>(LC, LoadCmd,
313                                                           *Y.get()))
314         EndPtr = *ExpectedEndPtr;
315       else
316         return ExpectedEndPtr.takeError();
317       break;
318 #include "llvm/BinaryFormat/MachO.def"
319     }
320     auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr);
321     if (!std::all_of(EndPtr, &EndPtr[RemainingBytes],
322                      [](const char C) { return C == 0; })) {
323       LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr,
324                              &EndPtr[RemainingBytes]);
325       RemainingBytes = 0;
326     }
327     LC.ZeroPadBytes = RemainingBytes;
328     Y->LoadCommands.push_back(std::move(LC));
329   }
330   return Error::success();
331 }
332 
dumpLinkEdit(std::unique_ptr<MachOYAML::Object> & Y)333 void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) {
334   dumpRebaseOpcodes(Y);
335   dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes());
336   dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes,
337                   Obj.getDyldInfoWeakBindOpcodes());
338   dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(),
339                   true);
340   dumpExportTrie(Y);
341   dumpSymbols(Y);
342 }
343 
dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> & Y)344 void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) {
345   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
346 
347   auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes();
348   for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end();
349        ++OpCode) {
350     MachOYAML::RebaseOpcode RebaseOp;
351     RebaseOp.Opcode =
352         static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK);
353     RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK;
354 
355     unsigned Count;
356     uint64_t ULEB = 0;
357 
358     switch (RebaseOp.Opcode) {
359     case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
360 
361       ULEB = decodeULEB128(OpCode + 1, &Count);
362       RebaseOp.ExtraData.push_back(ULEB);
363       OpCode += Count;
364       LLVM_FALLTHROUGH;
365     // Intentionally no break here -- This opcode has two ULEB values
366     case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
367     case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
368     case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
369     case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
370 
371       ULEB = decodeULEB128(OpCode + 1, &Count);
372       RebaseOp.ExtraData.push_back(ULEB);
373       OpCode += Count;
374       break;
375     default:
376       break;
377     }
378 
379     LEData.RebaseOpcodes.push_back(RebaseOp);
380 
381     if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE)
382       break;
383   }
384 }
385 
ReadStringRef(const uint8_t * Start)386 StringRef ReadStringRef(const uint8_t *Start) {
387   const uint8_t *Itr = Start;
388   for (; *Itr; ++Itr)
389     ;
390   return StringRef(reinterpret_cast<const char *>(Start), Itr - Start);
391 }
392 
dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> & BindOpcodes,ArrayRef<uint8_t> OpcodeBuffer,bool Lazy)393 void MachODumper::dumpBindOpcodes(
394     std::vector<MachOYAML::BindOpcode> &BindOpcodes,
395     ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) {
396   for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end();
397        ++OpCode) {
398     MachOYAML::BindOpcode BindOp;
399     BindOp.Opcode =
400         static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK);
401     BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK;
402 
403     unsigned Count;
404     uint64_t ULEB = 0;
405     int64_t SLEB = 0;
406 
407     switch (BindOp.Opcode) {
408     case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
409       ULEB = decodeULEB128(OpCode + 1, &Count);
410       BindOp.ULEBExtraData.push_back(ULEB);
411       OpCode += Count;
412       LLVM_FALLTHROUGH;
413     // Intentionally no break here -- this opcode has two ULEB values
414 
415     case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
416     case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
417     case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
418     case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
419       ULEB = decodeULEB128(OpCode + 1, &Count);
420       BindOp.ULEBExtraData.push_back(ULEB);
421       OpCode += Count;
422       break;
423 
424     case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
425       SLEB = decodeSLEB128(OpCode + 1, &Count);
426       BindOp.SLEBExtraData.push_back(SLEB);
427       OpCode += Count;
428       break;
429 
430     case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
431       BindOp.Symbol = ReadStringRef(OpCode + 1);
432       OpCode += BindOp.Symbol.size() + 1;
433       break;
434     default:
435       break;
436     }
437 
438     BindOpcodes.push_back(BindOp);
439 
440     // Lazy bindings have DONE opcodes between operations, so we need to keep
441     // processing after a DONE.
442     if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE)
443       break;
444   }
445 }
446 
447 /*!
448  * /brief processes a node from the export trie, and its children.
449  *
450  * To my knowledge there is no documentation of the encoded format of this data
451  * other than in the heads of the Apple linker engineers. To that end hopefully
452  * this comment and the implementation below can serve to light the way for
453  * anyone crazy enough to come down this path in the future.
454  *
455  * This function reads and preserves the trie structure of the export trie. To
456  * my knowledge there is no code anywhere else that reads the data and preserves
457  * the Trie. LD64 (sources available at opensource.apple.com) has a similar
458  * implementation that parses the export trie into a vector. That code as well
459  * as LLVM's libObject MachO implementation were the basis for this.
460  *
461  * The export trie is an encoded trie. The node serialization is a bit awkward.
462  * The below pseudo-code is the best description I've come up with for it.
463  *
464  * struct SerializedNode {
465  *   ULEB128 TerminalSize;
466  *   struct TerminalData { <-- This is only present if TerminalSize > 0
467  *     ULEB128 Flags;
468  *     ULEB128 Address; <-- Present if (! Flags & REEXPORT )
469  *     ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
470  *                                     Flags & STUB_AND_RESOLVER )
471  *     char[] ImportName; <-- Present if ( Flags & REEXPORT )
472  *   }
473  *   uint8_t ChildrenCount;
474  *   Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
475  *   SerializedNode Children[ChildrenCount]
476  * }
477  *
478  * Terminal nodes are nodes that represent actual exports. They can appear
479  * anywhere in the tree other than at the root; they do not need to be leaf
480  * nodes. When reading the data out of the trie this routine reads it in-order,
481  * but it puts the child names and offsets directly into the child nodes. This
482  * results in looping over the children twice during serialization and
483  * de-serialization, but it makes the YAML representation more human readable.
484  *
485  * Below is an example of the graph from a "Hello World" executable:
486  *
487  * -------
488  * | ''  |
489  * -------
490  *    |
491  * -------
492  * | '_' |
493  * -------
494  *    |
495  *    |----------------------------------------|
496  *    |                                        |
497  *  ------------------------      ---------------------
498  *  | '_mh_execute_header' |      | 'main'            |
499  *  | Flags: 0x00000000    |      | Flags: 0x00000000 |
500  *  | Addr:  0x00000000    |      | Addr:  0x00001160 |
501  *  ------------------------      ---------------------
502  *
503  * This graph represents the trie for the exports "__mh_execute_header" and
504  * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
505  * terminal.
506 */
507 
processExportNode(const uint8_t * CurrPtr,const uint8_t * const End,MachOYAML::ExportEntry & Entry)508 const uint8_t *processExportNode(const uint8_t *CurrPtr,
509                                  const uint8_t *const End,
510                                  MachOYAML::ExportEntry &Entry) {
511   if (CurrPtr >= End)
512     return CurrPtr;
513   unsigned Count = 0;
514   Entry.TerminalSize = decodeULEB128(CurrPtr, &Count);
515   CurrPtr += Count;
516   if (Entry.TerminalSize != 0) {
517     Entry.Flags = decodeULEB128(CurrPtr, &Count);
518     CurrPtr += Count;
519     if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
520       Entry.Address = 0;
521       Entry.Other = decodeULEB128(CurrPtr, &Count);
522       CurrPtr += Count;
523       Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr));
524     } else {
525       Entry.Address = decodeULEB128(CurrPtr, &Count);
526       CurrPtr += Count;
527       if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
528         Entry.Other = decodeULEB128(CurrPtr, &Count);
529         CurrPtr += Count;
530       } else
531         Entry.Other = 0;
532     }
533   }
534   uint8_t childrenCount = *CurrPtr++;
535   if (childrenCount == 0)
536     return CurrPtr;
537 
538   Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount,
539                         MachOYAML::ExportEntry());
540   for (auto &Child : Entry.Children) {
541     Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr));
542     CurrPtr += Child.Name.length() + 1;
543     Child.NodeOffset = decodeULEB128(CurrPtr, &Count);
544     CurrPtr += Count;
545   }
546   for (auto &Child : Entry.Children) {
547     CurrPtr = processExportNode(CurrPtr, End, Child);
548   }
549   return CurrPtr;
550 }
551 
dumpExportTrie(std::unique_ptr<MachOYAML::Object> & Y)552 void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) {
553   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
554   auto ExportsTrie = Obj.getDyldInfoExportsTrie();
555   processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie);
556 }
557 
558 template <typename nlist_t>
constructNameList(const nlist_t & nlist)559 MachOYAML::NListEntry constructNameList(const nlist_t &nlist) {
560   MachOYAML::NListEntry NL;
561   NL.n_strx = nlist.n_strx;
562   NL.n_type = nlist.n_type;
563   NL.n_sect = nlist.n_sect;
564   NL.n_desc = nlist.n_desc;
565   NL.n_value = nlist.n_value;
566   return NL;
567 }
568 
dumpSymbols(std::unique_ptr<MachOYAML::Object> & Y)569 void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
570   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
571 
572   for (auto Symbol : Obj.symbols()) {
573     MachOYAML::NListEntry NLE =
574         Obj.is64Bit()
575             ? constructNameList<MachO::nlist_64>(
576                   Obj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
577             : constructNameList<MachO::nlist>(
578                   Obj.getSymbolTableEntry(Symbol.getRawDataRefImpl()));
579     LEData.NameList.push_back(NLE);
580   }
581 
582   StringRef RemainingTable = Obj.getStringTableData();
583   while (RemainingTable.size() > 0) {
584     auto SymbolPair = RemainingTable.split('\0');
585     RemainingTable = SymbolPair.second;
586     LEData.StringTable.push_back(SymbolPair.first);
587   }
588 }
589 
macho2yaml(raw_ostream & Out,const object::MachOObjectFile & Obj)590 Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) {
591   std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj);
592   MachODumper Dumper(Obj, std::move(DCtx));
593   Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
594   if (!YAML)
595     return YAML.takeError();
596 
597   yaml::YamlObjectFile YAMLFile;
598   YAMLFile.MachO = std::move(YAML.get());
599 
600   yaml::Output Yout(Out);
601   Yout << YAMLFile;
602   return Error::success();
603 }
604 
macho2yaml(raw_ostream & Out,const object::MachOUniversalBinary & Obj)605 Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
606   yaml::YamlObjectFile YAMLFile;
607   YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary());
608   MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
609   YAML.Header.magic = Obj.getMagic();
610   YAML.Header.nfat_arch = Obj.getNumberOfObjects();
611 
612   for (auto Slice : Obj.objects()) {
613     MachOYAML::FatArch arch;
614     arch.cputype = Slice.getCPUType();
615     arch.cpusubtype = Slice.getCPUSubType();
616     arch.offset = Slice.getOffset();
617     arch.size = Slice.getSize();
618     arch.align = Slice.getAlign();
619     arch.reserved = Slice.getReserved();
620     YAML.FatArchs.push_back(arch);
621 
622     auto SliceObj = Slice.getAsObjectFile();
623     if (!SliceObj)
624       return SliceObj.takeError();
625 
626     std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(*SliceObj.get());
627     MachODumper Dumper(*SliceObj.get(), std::move(DCtx));
628     Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
629     if (!YAMLObj)
630       return YAMLObj.takeError();
631     YAML.Slices.push_back(*YAMLObj.get());
632   }
633 
634   yaml::Output Yout(Out);
635   Yout << YAML;
636   return Error::success();
637 }
638 
macho2yaml(raw_ostream & Out,const object::Binary & Binary)639 Error macho2yaml(raw_ostream &Out, const object::Binary &Binary) {
640   if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary))
641     return macho2yaml(Out, *MachOObj);
642 
643   if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary))
644     return macho2yaml(Out, *MachOObj);
645 
646   llvm_unreachable("unexpected Mach-O file format");
647 }
648