1 //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "obj2yaml.h"
10 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
11 #include "llvm/Object/MachOUniversal.h"
12 #include "llvm/ObjectYAML/DWARFYAML.h"
13 #include "llvm/ObjectYAML/ObjectYAML.h"
14 #include "llvm/Support/Errc.h"
15 #include "llvm/Support/Error.h"
16 #include "llvm/Support/ErrorHandling.h"
17 #include "llvm/Support/LEB128.h"
18
19 #include <string.h> // for memcpy
20
21 using namespace llvm;
22
23 class MachODumper {
24
25 template <typename StructType>
26 Expected<const char *> processLoadCommandData(
27 MachOYAML::LoadCommand &LC,
28 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
29 MachOYAML::Object &Y);
30
31 const object::MachOObjectFile &Obj;
32 std::unique_ptr<DWARFContext> DWARFCtx;
33 unsigned RawSegment;
34 void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
35 Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
36 void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
37 void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y);
38 void dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y);
39 void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes,
40 ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false);
41 void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y);
42 void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y);
43 void dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y);
44 void dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y);
45 void dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y);
46
47 template <typename SectionType>
48 Expected<MachOYAML::Section> constructSectionCommon(SectionType Sec,
49 size_t SecIndex);
50 template <typename SectionType>
51 Expected<MachOYAML::Section> constructSection(SectionType Sec,
52 size_t SecIndex);
53 template <typename SectionType, typename SegmentType>
54 Expected<const char *>
55 extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
56 std::vector<MachOYAML::Section> &Sections,
57 MachOYAML::Object &Y);
58
59 public:
MachODumper(const object::MachOObjectFile & O,std::unique_ptr<DWARFContext> DCtx,unsigned RawSegments)60 MachODumper(const object::MachOObjectFile &O,
61 std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments)
62 : Obj(O), DWARFCtx(std::move(DCtx)), RawSegment(RawSegments) {}
63 Expected<std::unique_ptr<MachOYAML::Object>> dump();
64 };
65
66 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
67 case MachO::LCName: \
68 memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr, \
69 sizeof(MachO::LCStruct)); \
70 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \
71 MachO::swapStruct(LC.Data.LCStruct##_data); \
72 if (Expected<const char *> ExpectedEndPtr = \
73 processLoadCommandData<MachO::LCStruct>(LC, LoadCmd, *Y.get())) \
74 EndPtr = *ExpectedEndPtr; \
75 else \
76 return ExpectedEndPtr.takeError(); \
77 break;
78
79 template <typename SectionType>
80 Expected<MachOYAML::Section>
constructSectionCommon(SectionType Sec,size_t SecIndex)81 MachODumper::constructSectionCommon(SectionType Sec, size_t SecIndex) {
82 MachOYAML::Section TempSec;
83 memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16);
84 memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16);
85 TempSec.addr = Sec.addr;
86 TempSec.size = Sec.size;
87 TempSec.offset = Sec.offset;
88 TempSec.align = Sec.align;
89 TempSec.reloff = Sec.reloff;
90 TempSec.nreloc = Sec.nreloc;
91 TempSec.flags = Sec.flags;
92 TempSec.reserved1 = Sec.reserved1;
93 TempSec.reserved2 = Sec.reserved2;
94 TempSec.reserved3 = 0;
95 if (!MachO::isVirtualSection(Sec.flags & MachO::SECTION_TYPE))
96 TempSec.content =
97 yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
98
99 if (Expected<object::SectionRef> SecRef = Obj.getSection(SecIndex)) {
100 TempSec.relocations.reserve(TempSec.nreloc);
101 for (const object::RelocationRef &Reloc : SecRef->relocations()) {
102 const object::DataRefImpl Rel = Reloc.getRawDataRefImpl();
103 const MachO::any_relocation_info RE = Obj.getRelocation(Rel);
104 MachOYAML::Relocation R;
105 R.address = Obj.getAnyRelocationAddress(RE);
106 R.is_pcrel = Obj.getAnyRelocationPCRel(RE);
107 R.length = Obj.getAnyRelocationLength(RE);
108 R.type = Obj.getAnyRelocationType(RE);
109 R.is_scattered = Obj.isRelocationScattered(RE);
110 R.symbolnum = (R.is_scattered ? 0 : Obj.getPlainRelocationSymbolNum(RE));
111 R.is_extern =
112 (R.is_scattered ? false : Obj.getPlainRelocationExternal(RE));
113 R.value = (R.is_scattered ? Obj.getScatteredRelocationValue(RE) : 0);
114 TempSec.relocations.push_back(R);
115 }
116 } else {
117 return SecRef.takeError();
118 }
119 return TempSec;
120 }
121
122 template <>
constructSection(MachO::section Sec,size_t SecIndex)123 Expected<MachOYAML::Section> MachODumper::constructSection(MachO::section Sec,
124 size_t SecIndex) {
125 Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
126 if (TempSec)
127 TempSec->reserved3 = 0;
128 return TempSec;
129 }
130
131 template <>
132 Expected<MachOYAML::Section>
constructSection(MachO::section_64 Sec,size_t SecIndex)133 MachODumper::constructSection(MachO::section_64 Sec, size_t SecIndex) {
134 Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
135 if (TempSec)
136 TempSec->reserved3 = Sec.reserved3;
137 return TempSec;
138 }
139
dumpDebugSection(StringRef SecName,DWARFContext & DCtx,DWARFYAML::Data & DWARF)140 static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx,
141 DWARFYAML::Data &DWARF) {
142 if (SecName == "__debug_abbrev") {
143 dumpDebugAbbrev(DCtx, DWARF);
144 return Error::success();
145 }
146 if (SecName == "__debug_aranges")
147 return dumpDebugARanges(DCtx, DWARF);
148 if (SecName == "__debug_info") {
149 dumpDebugInfo(DCtx, DWARF);
150 return Error::success();
151 }
152 if (SecName == "__debug_line") {
153 dumpDebugLines(DCtx, DWARF);
154 return Error::success();
155 }
156 if (SecName.startswith("__debug_pub")) {
157 // FIXME: We should extract pub-section dumpers from this function.
158 dumpDebugPubSections(DCtx, DWARF);
159 return Error::success();
160 }
161 if (SecName == "__debug_ranges")
162 return dumpDebugRanges(DCtx, DWARF);
163 if (SecName == "__debug_str")
164 return dumpDebugStrings(DCtx, DWARF);
165 return createStringError(errc::not_supported,
166 "dumping " + SecName + " section is not supported");
167 }
168
169 template <typename SectionType, typename SegmentType>
extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,std::vector<MachOYAML::Section> & Sections,MachOYAML::Object & Y)170 Expected<const char *> MachODumper::extractSections(
171 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
172 std::vector<MachOYAML::Section> &Sections, MachOYAML::Object &Y) {
173 auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
174 const SectionType *Curr =
175 reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
176 for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
177 SectionType Sec;
178 memcpy((void *)&Sec, Curr, sizeof(SectionType));
179 if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
180 MachO::swapStruct(Sec);
181 // For MachO section indices start from 1.
182 if (Expected<MachOYAML::Section> S =
183 constructSection(Sec, Sections.size() + 1)) {
184 StringRef SecName(S->sectname);
185
186 // Copy data sections if requested.
187 if ((RawSegment & ::RawSegments::data) &&
188 StringRef(S->segname).startswith("__DATA"))
189 S->content =
190 yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
191
192 if (SecName.startswith("__debug_")) {
193 // If the DWARF section cannot be successfully parsed, emit raw content
194 // instead of an entry in the DWARF section of the YAML.
195 if (Error Err = dumpDebugSection(SecName, *DWARFCtx, Y.DWARF))
196 consumeError(std::move(Err));
197 else
198 S->content.reset();
199 }
200 Sections.push_back(std::move(*S));
201 } else
202 return S.takeError();
203 }
204 return reinterpret_cast<const char *>(Curr);
205 }
206
207 template <typename StructType>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)208 Expected<const char *> MachODumper::processLoadCommandData(
209 MachOYAML::LoadCommand &LC,
210 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
211 MachOYAML::Object &Y) {
212 return LoadCmd.Ptr + sizeof(StructType);
213 }
214
215 template <>
216 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)217 MachODumper::processLoadCommandData<MachO::segment_command>(
218 MachOYAML::LoadCommand &LC,
219 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
220 MachOYAML::Object &Y) {
221 return extractSections<MachO::section, MachO::segment_command>(
222 LoadCmd, LC.Sections, Y);
223 }
224
225 template <>
226 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)227 MachODumper::processLoadCommandData<MachO::segment_command_64>(
228 MachOYAML::LoadCommand &LC,
229 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
230 MachOYAML::Object &Y) {
231 return extractSections<MachO::section_64, MachO::segment_command_64>(
232 LoadCmd, LC.Sections, Y);
233 }
234
235 template <typename StructType>
236 const char *
readString(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd)237 readString(MachOYAML::LoadCommand &LC,
238 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
239 auto Start = LoadCmd.Ptr + sizeof(StructType);
240 auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType);
241 auto Size = strnlen(Start, MaxSize);
242 LC.Content = StringRef(Start, Size).str();
243 return Start + Size;
244 }
245
246 template <>
247 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)248 MachODumper::processLoadCommandData<MachO::dylib_command>(
249 MachOYAML::LoadCommand &LC,
250 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
251 MachOYAML::Object &Y) {
252 return readString<MachO::dylib_command>(LC, LoadCmd);
253 }
254
255 template <>
256 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)257 MachODumper::processLoadCommandData<MachO::dylinker_command>(
258 MachOYAML::LoadCommand &LC,
259 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
260 MachOYAML::Object &Y) {
261 return readString<MachO::dylinker_command>(LC, LoadCmd);
262 }
263
264 template <>
265 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)266 MachODumper::processLoadCommandData<MachO::rpath_command>(
267 MachOYAML::LoadCommand &LC,
268 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
269 MachOYAML::Object &Y) {
270 return readString<MachO::rpath_command>(LC, LoadCmd);
271 }
272
273 template <>
274 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)275 MachODumper::processLoadCommandData<MachO::build_version_command>(
276 MachOYAML::LoadCommand &LC,
277 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
278 MachOYAML::Object &Y) {
279 auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command);
280 auto NTools = LC.Data.build_version_command_data.ntools;
281 for (unsigned i = 0; i < NTools; ++i) {
282 auto Curr = Start + i * sizeof(MachO::build_tool_version);
283 MachO::build_tool_version BV;
284 memcpy((void *)&BV, Curr, sizeof(MachO::build_tool_version));
285 if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
286 MachO::swapStruct(BV);
287 LC.Tools.push_back(BV);
288 }
289 return Start + NTools * sizeof(MachO::build_tool_version);
290 }
291
dump()292 Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
293 auto Y = std::make_unique<MachOYAML::Object>();
294 Y->IsLittleEndian = Obj.isLittleEndian();
295 dumpHeader(Y);
296 if (Error Err = dumpLoadCommands(Y))
297 return std::move(Err);
298 if (RawSegment & ::RawSegments::linkedit)
299 Y->RawLinkEditSegment =
300 yaml::BinaryRef(Obj.getSegmentContents("__LINKEDIT"));
301 else
302 dumpLinkEdit(Y);
303
304 return std::move(Y);
305 }
306
dumpHeader(std::unique_ptr<MachOYAML::Object> & Y)307 void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) {
308 Y->Header.magic = Obj.getHeader().magic;
309 Y->Header.cputype = Obj.getHeader().cputype;
310 Y->Header.cpusubtype = Obj.getHeader().cpusubtype;
311 Y->Header.filetype = Obj.getHeader().filetype;
312 Y->Header.ncmds = Obj.getHeader().ncmds;
313 Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds;
314 Y->Header.flags = Obj.getHeader().flags;
315 Y->Header.reserved = 0;
316 }
317
dumpLoadCommands(std::unique_ptr<MachOYAML::Object> & Y)318 Error MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) {
319 for (auto LoadCmd : Obj.load_commands()) {
320 MachOYAML::LoadCommand LC;
321 const char *EndPtr = LoadCmd.Ptr;
322 switch (LoadCmd.C.cmd) {
323 default:
324 memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr,
325 sizeof(MachO::load_command));
326 if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
327 MachO::swapStruct(LC.Data.load_command_data);
328 if (Expected<const char *> ExpectedEndPtr =
329 processLoadCommandData<MachO::load_command>(LC, LoadCmd, *Y))
330 EndPtr = *ExpectedEndPtr;
331 else
332 return ExpectedEndPtr.takeError();
333 break;
334 #include "llvm/BinaryFormat/MachO.def"
335 }
336 auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr);
337 if (!std::all_of(EndPtr, &EndPtr[RemainingBytes],
338 [](const char C) { return C == 0; })) {
339 LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr,
340 &EndPtr[RemainingBytes]);
341 RemainingBytes = 0;
342 }
343 LC.ZeroPadBytes = RemainingBytes;
344 Y->LoadCommands.push_back(std::move(LC));
345 }
346 return Error::success();
347 }
348
dumpLinkEdit(std::unique_ptr<MachOYAML::Object> & Y)349 void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) {
350 dumpRebaseOpcodes(Y);
351 dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes());
352 dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes,
353 Obj.getDyldInfoWeakBindOpcodes());
354 dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(),
355 true);
356 dumpExportTrie(Y);
357 dumpSymbols(Y);
358 dumpIndirectSymbols(Y);
359 dumpFunctionStarts(Y);
360 dumpChainedFixups(Y);
361 dumpDataInCode(Y);
362 }
363
dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> & Y)364 void MachODumper::dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y) {
365 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
366
367 auto FunctionStarts = Obj.getFunctionStarts();
368 for (auto Addr : FunctionStarts)
369 LEData.FunctionStarts.push_back(Addr);
370 }
371
dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> & Y)372 void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) {
373 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
374
375 auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes();
376 for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end();
377 ++OpCode) {
378 MachOYAML::RebaseOpcode RebaseOp;
379 RebaseOp.Opcode =
380 static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK);
381 RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK;
382
383 unsigned Count;
384 uint64_t ULEB = 0;
385
386 switch (RebaseOp.Opcode) {
387 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
388
389 ULEB = decodeULEB128(OpCode + 1, &Count);
390 RebaseOp.ExtraData.push_back(ULEB);
391 OpCode += Count;
392 [[fallthrough]];
393 // Intentionally no break here -- This opcode has two ULEB values
394 case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
395 case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
396 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
397 case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
398
399 ULEB = decodeULEB128(OpCode + 1, &Count);
400 RebaseOp.ExtraData.push_back(ULEB);
401 OpCode += Count;
402 break;
403 default:
404 break;
405 }
406
407 LEData.RebaseOpcodes.push_back(RebaseOp);
408
409 if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE)
410 break;
411 }
412 }
413
ReadStringRef(const uint8_t * Start)414 StringRef ReadStringRef(const uint8_t *Start) {
415 const uint8_t *Itr = Start;
416 for (; *Itr; ++Itr)
417 ;
418 return StringRef(reinterpret_cast<const char *>(Start), Itr - Start);
419 }
420
dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> & BindOpcodes,ArrayRef<uint8_t> OpcodeBuffer,bool Lazy)421 void MachODumper::dumpBindOpcodes(
422 std::vector<MachOYAML::BindOpcode> &BindOpcodes,
423 ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) {
424 for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end();
425 ++OpCode) {
426 MachOYAML::BindOpcode BindOp;
427 BindOp.Opcode =
428 static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK);
429 BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK;
430
431 unsigned Count;
432 uint64_t ULEB = 0;
433 int64_t SLEB = 0;
434
435 switch (BindOp.Opcode) {
436 case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
437 ULEB = decodeULEB128(OpCode + 1, &Count);
438 BindOp.ULEBExtraData.push_back(ULEB);
439 OpCode += Count;
440 [[fallthrough]];
441 // Intentionally no break here -- this opcode has two ULEB values
442
443 case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
444 case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
445 case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
446 case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
447 ULEB = decodeULEB128(OpCode + 1, &Count);
448 BindOp.ULEBExtraData.push_back(ULEB);
449 OpCode += Count;
450 break;
451
452 case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
453 SLEB = decodeSLEB128(OpCode + 1, &Count);
454 BindOp.SLEBExtraData.push_back(SLEB);
455 OpCode += Count;
456 break;
457
458 case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
459 BindOp.Symbol = ReadStringRef(OpCode + 1);
460 OpCode += BindOp.Symbol.size() + 1;
461 break;
462 default:
463 break;
464 }
465
466 BindOpcodes.push_back(BindOp);
467
468 // Lazy bindings have DONE opcodes between operations, so we need to keep
469 // processing after a DONE.
470 if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE)
471 break;
472 }
473 }
474
475 /*!
476 * /brief processes a node from the export trie, and its children.
477 *
478 * To my knowledge there is no documentation of the encoded format of this data
479 * other than in the heads of the Apple linker engineers. To that end hopefully
480 * this comment and the implementation below can serve to light the way for
481 * anyone crazy enough to come down this path in the future.
482 *
483 * This function reads and preserves the trie structure of the export trie. To
484 * my knowledge there is no code anywhere else that reads the data and preserves
485 * the Trie. LD64 (sources available at opensource.apple.com) has a similar
486 * implementation that parses the export trie into a vector. That code as well
487 * as LLVM's libObject MachO implementation were the basis for this.
488 *
489 * The export trie is an encoded trie. The node serialization is a bit awkward.
490 * The below pseudo-code is the best description I've come up with for it.
491 *
492 * struct SerializedNode {
493 * ULEB128 TerminalSize;
494 * struct TerminalData { <-- This is only present if TerminalSize > 0
495 * ULEB128 Flags;
496 * ULEB128 Address; <-- Present if (! Flags & REEXPORT )
497 * ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
498 * Flags & STUB_AND_RESOLVER )
499 * char[] ImportName; <-- Present if ( Flags & REEXPORT )
500 * }
501 * uint8_t ChildrenCount;
502 * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
503 * SerializedNode Children[ChildrenCount]
504 * }
505 *
506 * Terminal nodes are nodes that represent actual exports. They can appear
507 * anywhere in the tree other than at the root; they do not need to be leaf
508 * nodes. When reading the data out of the trie this routine reads it in-order,
509 * but it puts the child names and offsets directly into the child nodes. This
510 * results in looping over the children twice during serialization and
511 * de-serialization, but it makes the YAML representation more human readable.
512 *
513 * Below is an example of the graph from a "Hello World" executable:
514 *
515 * -------
516 * | '' |
517 * -------
518 * |
519 * -------
520 * | '_' |
521 * -------
522 * |
523 * |----------------------------------------|
524 * | |
525 * ------------------------ ---------------------
526 * | '_mh_execute_header' | | 'main' |
527 * | Flags: 0x00000000 | | Flags: 0x00000000 |
528 * | Addr: 0x00000000 | | Addr: 0x00001160 |
529 * ------------------------ ---------------------
530 *
531 * This graph represents the trie for the exports "__mh_execute_header" and
532 * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
533 * terminal.
534 */
535
processExportNode(const uint8_t * CurrPtr,const uint8_t * const End,MachOYAML::ExportEntry & Entry)536 const uint8_t *processExportNode(const uint8_t *CurrPtr,
537 const uint8_t *const End,
538 MachOYAML::ExportEntry &Entry) {
539 if (CurrPtr >= End)
540 return CurrPtr;
541 unsigned Count = 0;
542 Entry.TerminalSize = decodeULEB128(CurrPtr, &Count);
543 CurrPtr += Count;
544 if (Entry.TerminalSize != 0) {
545 Entry.Flags = decodeULEB128(CurrPtr, &Count);
546 CurrPtr += Count;
547 if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
548 Entry.Address = 0;
549 Entry.Other = decodeULEB128(CurrPtr, &Count);
550 CurrPtr += Count;
551 Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr));
552 } else {
553 Entry.Address = decodeULEB128(CurrPtr, &Count);
554 CurrPtr += Count;
555 if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
556 Entry.Other = decodeULEB128(CurrPtr, &Count);
557 CurrPtr += Count;
558 } else
559 Entry.Other = 0;
560 }
561 }
562 uint8_t childrenCount = *CurrPtr++;
563 if (childrenCount == 0)
564 return CurrPtr;
565
566 Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount,
567 MachOYAML::ExportEntry());
568 for (auto &Child : Entry.Children) {
569 Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr));
570 CurrPtr += Child.Name.length() + 1;
571 Child.NodeOffset = decodeULEB128(CurrPtr, &Count);
572 CurrPtr += Count;
573 }
574 for (auto &Child : Entry.Children) {
575 CurrPtr = processExportNode(CurrPtr, End, Child);
576 }
577 return CurrPtr;
578 }
579
dumpExportTrie(std::unique_ptr<MachOYAML::Object> & Y)580 void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) {
581 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
582 // The exports trie can be in LC_DYLD_INFO or LC_DYLD_EXPORTS_TRIE
583 auto ExportsTrie = Obj.getDyldInfoExportsTrie();
584 if (ExportsTrie.empty())
585 ExportsTrie = Obj.getDyldExportsTrie();
586 processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie);
587 }
588
589 template <typename nlist_t>
constructNameList(const nlist_t & nlist)590 MachOYAML::NListEntry constructNameList(const nlist_t &nlist) {
591 MachOYAML::NListEntry NL;
592 NL.n_strx = nlist.n_strx;
593 NL.n_type = nlist.n_type;
594 NL.n_sect = nlist.n_sect;
595 NL.n_desc = nlist.n_desc;
596 NL.n_value = nlist.n_value;
597 return NL;
598 }
599
dumpSymbols(std::unique_ptr<MachOYAML::Object> & Y)600 void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
601 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
602
603 for (auto Symbol : Obj.symbols()) {
604 MachOYAML::NListEntry NLE =
605 Obj.is64Bit()
606 ? constructNameList<MachO::nlist_64>(
607 Obj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
608 : constructNameList<MachO::nlist>(
609 Obj.getSymbolTableEntry(Symbol.getRawDataRefImpl()));
610 LEData.NameList.push_back(NLE);
611 }
612
613 StringRef RemainingTable = Obj.getStringTableData();
614 while (RemainingTable.size() > 0) {
615 auto SymbolPair = RemainingTable.split('\0');
616 RemainingTable = SymbolPair.second;
617 LEData.StringTable.push_back(SymbolPair.first);
618 }
619 }
620
dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> & Y)621 void MachODumper::dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
622 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
623
624 MachO::dysymtab_command DLC = Obj.getDysymtabLoadCommand();
625 for (unsigned i = 0; i < DLC.nindirectsyms; ++i)
626 LEData.IndirectSymbols.push_back(Obj.getIndirectSymbolTableEntry(DLC, i));
627 }
628
dumpChainedFixups(std::unique_ptr<MachOYAML::Object> & Y)629 void MachODumper::dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y) {
630 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
631
632 for (const auto &LC : Y->LoadCommands) {
633 if (LC.Data.load_command_data.cmd == llvm::MachO::LC_DYLD_CHAINED_FIXUPS) {
634 const MachO::linkedit_data_command &DC =
635 LC.Data.linkedit_data_command_data;
636 if (DC.dataoff) {
637 assert(DC.dataoff < Obj.getData().size());
638 assert(DC.dataoff + DC.datasize <= Obj.getData().size());
639 const char *Bytes = Obj.getData().data() + DC.dataoff;
640 for (size_t Idx = 0; Idx < DC.datasize; Idx++) {
641 LEData.ChainedFixups.push_back(Bytes[Idx]);
642 }
643 }
644 break;
645 }
646 }
647 }
648
dumpDataInCode(std::unique_ptr<MachOYAML::Object> & Y)649 void MachODumper::dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y) {
650 MachOYAML::LinkEditData &LEData = Y->LinkEdit;
651
652 MachO::linkedit_data_command DIC = Obj.getDataInCodeLoadCommand();
653 uint32_t NumEntries = DIC.datasize / sizeof(MachO::data_in_code_entry);
654 for (uint32_t Idx = 0; Idx < NumEntries; ++Idx) {
655 MachO::data_in_code_entry DICE =
656 Obj.getDataInCodeTableEntry(DIC.dataoff, Idx);
657 MachOYAML::DataInCodeEntry Entry{DICE.offset, DICE.length, DICE.kind};
658 LEData.DataInCode.emplace_back(Entry);
659 }
660 }
661
macho2yaml(raw_ostream & Out,const object::MachOObjectFile & Obj,unsigned RawSegments)662 Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj,
663 unsigned RawSegments) {
664 std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj);
665 MachODumper Dumper(Obj, std::move(DCtx), RawSegments);
666 Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
667 if (!YAML)
668 return YAML.takeError();
669
670 yaml::YamlObjectFile YAMLFile;
671 YAMLFile.MachO = std::move(YAML.get());
672
673 yaml::Output Yout(Out);
674 Yout << YAMLFile;
675 return Error::success();
676 }
677
macho2yaml(raw_ostream & Out,const object::MachOUniversalBinary & Obj,unsigned RawSegments)678 Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj,
679 unsigned RawSegments) {
680 yaml::YamlObjectFile YAMLFile;
681 YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary());
682 MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
683 YAML.Header.magic = Obj.getMagic();
684 YAML.Header.nfat_arch = Obj.getNumberOfObjects();
685
686 for (auto Slice : Obj.objects()) {
687 MachOYAML::FatArch arch;
688 arch.cputype = Slice.getCPUType();
689 arch.cpusubtype = Slice.getCPUSubType();
690 arch.offset = Slice.getOffset();
691 arch.size = Slice.getSize();
692 arch.align = Slice.getAlign();
693 arch.reserved = Slice.getReserved();
694 YAML.FatArchs.push_back(arch);
695
696 auto SliceObj = Slice.getAsObjectFile();
697 if (!SliceObj)
698 return SliceObj.takeError();
699
700 std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(*SliceObj.get());
701 MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments);
702 Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
703 if (!YAMLObj)
704 return YAMLObj.takeError();
705 YAML.Slices.push_back(*YAMLObj.get());
706 }
707
708 yaml::Output Yout(Out);
709 Yout << YAML;
710 return Error::success();
711 }
712
macho2yaml(raw_ostream & Out,const object::Binary & Binary,unsigned RawSegments)713 Error macho2yaml(raw_ostream &Out, const object::Binary &Binary,
714 unsigned RawSegments) {
715 if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary))
716 return macho2yaml(Out, *MachOObj, RawSegments);
717
718 if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary))
719 return macho2yaml(Out, *MachOObj, RawSegments);
720
721 llvm_unreachable("unexpected Mach-O file format");
722 }
723