1 //===- MachOWriter.cpp ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "MachOWriter.h"
10 #include "MachOLayoutBuilder.h"
11 #include "Object.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/BinaryFormat/MachO.h"
14 #include "llvm/Object/MachO.h"
15 #include "llvm/Support/Errc.h"
16 #include "llvm/Support/ErrorHandling.h"
17 #include <memory>
18
19 namespace llvm {
20 namespace objcopy {
21 namespace macho {
22
headerSize() const23 size_t MachOWriter::headerSize() const {
24 return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
25 }
26
loadCommandsSize() const27 size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; }
28
symTableSize() const29 size_t MachOWriter::symTableSize() const {
30 return O.SymTable.Symbols.size() *
31 (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist));
32 }
33
totalSize() const34 size_t MachOWriter::totalSize() const {
35 // Going from tail to head and looking for an appropriate "anchor" to
36 // calculate the total size assuming that all the offsets are either valid
37 // ("true") or 0 (0 indicates that the corresponding part is missing).
38
39 SmallVector<size_t, 7> Ends;
40 if (O.SymTabCommandIndex) {
41 const MachO::symtab_command &SymTabCommand =
42 O.LoadCommands[*O.SymTabCommandIndex]
43 .MachOLoadCommand.symtab_command_data;
44 if (SymTabCommand.symoff)
45 Ends.push_back(SymTabCommand.symoff + symTableSize());
46 if (SymTabCommand.stroff)
47 Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize);
48 }
49 if (O.DyLdInfoCommandIndex) {
50 const MachO::dyld_info_command &DyLdInfoCommand =
51 O.LoadCommands[*O.DyLdInfoCommandIndex]
52 .MachOLoadCommand.dyld_info_command_data;
53 if (DyLdInfoCommand.rebase_off) {
54 assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) &&
55 "Incorrect rebase opcodes size");
56 Ends.push_back(DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size);
57 }
58 if (DyLdInfoCommand.bind_off) {
59 assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) &&
60 "Incorrect bind opcodes size");
61 Ends.push_back(DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size);
62 }
63 if (DyLdInfoCommand.weak_bind_off) {
64 assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) &&
65 "Incorrect weak bind opcodes size");
66 Ends.push_back(DyLdInfoCommand.weak_bind_off +
67 DyLdInfoCommand.weak_bind_size);
68 }
69 if (DyLdInfoCommand.lazy_bind_off) {
70 assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) &&
71 "Incorrect lazy bind opcodes size");
72 Ends.push_back(DyLdInfoCommand.lazy_bind_off +
73 DyLdInfoCommand.lazy_bind_size);
74 }
75 if (DyLdInfoCommand.export_off) {
76 assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) &&
77 "Incorrect trie size");
78 Ends.push_back(DyLdInfoCommand.export_off + DyLdInfoCommand.export_size);
79 }
80 }
81
82 if (O.DySymTabCommandIndex) {
83 const MachO::dysymtab_command &DySymTabCommand =
84 O.LoadCommands[*O.DySymTabCommandIndex]
85 .MachOLoadCommand.dysymtab_command_data;
86
87 if (DySymTabCommand.indirectsymoff)
88 Ends.push_back(DySymTabCommand.indirectsymoff +
89 sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
90 }
91
92 if (O.DataInCodeCommandIndex) {
93 const MachO::linkedit_data_command &LinkEditDataCommand =
94 O.LoadCommands[*O.DataInCodeCommandIndex]
95 .MachOLoadCommand.linkedit_data_command_data;
96
97 if (LinkEditDataCommand.dataoff)
98 Ends.push_back(LinkEditDataCommand.dataoff +
99 LinkEditDataCommand.datasize);
100 }
101
102 if (O.FunctionStartsCommandIndex) {
103 const MachO::linkedit_data_command &LinkEditDataCommand =
104 O.LoadCommands[*O.FunctionStartsCommandIndex]
105 .MachOLoadCommand.linkedit_data_command_data;
106
107 if (LinkEditDataCommand.dataoff)
108 Ends.push_back(LinkEditDataCommand.dataoff +
109 LinkEditDataCommand.datasize);
110 }
111
112 // Otherwise, use the last section / reloction.
113 for (const auto &LC : O.LoadCommands)
114 for (const auto &S : LC.Sections) {
115 Ends.push_back(S.Offset + S.Size);
116 if (S.RelOff)
117 Ends.push_back(S.RelOff +
118 S.NReloc * sizeof(MachO::any_relocation_info));
119 }
120
121 if (!Ends.empty())
122 return *std::max_element(Ends.begin(), Ends.end());
123
124 // Otherwise, we have only Mach header and load commands.
125 return headerSize() + loadCommandsSize();
126 }
127
writeHeader()128 void MachOWriter::writeHeader() {
129 MachO::mach_header_64 Header;
130
131 Header.magic = O.Header.Magic;
132 Header.cputype = O.Header.CPUType;
133 Header.cpusubtype = O.Header.CPUSubType;
134 Header.filetype = O.Header.FileType;
135 Header.ncmds = O.Header.NCmds;
136 Header.sizeofcmds = O.Header.SizeOfCmds;
137 Header.flags = O.Header.Flags;
138 Header.reserved = O.Header.Reserved;
139
140 if (IsLittleEndian != sys::IsLittleEndianHost)
141 MachO::swapStruct(Header);
142
143 auto HeaderSize =
144 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
145 memcpy(B.getBufferStart(), &Header, HeaderSize);
146 }
147
writeLoadCommands()148 void MachOWriter::writeLoadCommands() {
149 uint8_t *Begin = B.getBufferStart() + headerSize();
150 for (const auto &LC : O.LoadCommands) {
151 // Construct a load command.
152 MachO::macho_load_command MLC = LC.MachOLoadCommand;
153 switch (MLC.load_command_data.cmd) {
154 case MachO::LC_SEGMENT:
155 if (IsLittleEndian != sys::IsLittleEndianHost)
156 MachO::swapStruct(MLC.segment_command_data);
157 memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command));
158 Begin += sizeof(MachO::segment_command);
159
160 for (const auto &Sec : LC.Sections)
161 writeSectionInLoadCommand<MachO::section>(Sec, Begin);
162 continue;
163 case MachO::LC_SEGMENT_64:
164 if (IsLittleEndian != sys::IsLittleEndianHost)
165 MachO::swapStruct(MLC.segment_command_64_data);
166 memcpy(Begin, &MLC.segment_command_64_data,
167 sizeof(MachO::segment_command_64));
168 Begin += sizeof(MachO::segment_command_64);
169
170 for (const auto &Sec : LC.Sections)
171 writeSectionInLoadCommand<MachO::section_64>(Sec, Begin);
172 continue;
173 }
174
175 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
176 case MachO::LCName: \
177 assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \
178 MLC.load_command_data.cmdsize); \
179 if (IsLittleEndian != sys::IsLittleEndianHost) \
180 MachO::swapStruct(MLC.LCStruct##_data); \
181 memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \
182 Begin += sizeof(MachO::LCStruct); \
183 if (!LC.Payload.empty()) \
184 memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \
185 Begin += LC.Payload.size(); \
186 break;
187
188 // Copy the load command as it is.
189 switch (MLC.load_command_data.cmd) {
190 default:
191 assert(sizeof(MachO::load_command) + LC.Payload.size() ==
192 MLC.load_command_data.cmdsize);
193 if (IsLittleEndian != sys::IsLittleEndianHost)
194 MachO::swapStruct(MLC.load_command_data);
195 memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command));
196 Begin += sizeof(MachO::load_command);
197 if (!LC.Payload.empty())
198 memcpy(Begin, LC.Payload.data(), LC.Payload.size());
199 Begin += LC.Payload.size();
200 break;
201 #include "llvm/BinaryFormat/MachO.def"
202 }
203 }
204 }
205
206 template <typename StructType>
writeSectionInLoadCommand(const Section & Sec,uint8_t * & Out)207 void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) {
208 StructType Temp;
209 assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name");
210 assert(Sec.Sectname.size() <= sizeof(Temp.sectname) &&
211 "too long section name");
212 memset(&Temp, 0, sizeof(StructType));
213 memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size());
214 memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size());
215 Temp.addr = Sec.Addr;
216 Temp.size = Sec.Size;
217 Temp.offset = Sec.Offset;
218 Temp.align = Sec.Align;
219 Temp.reloff = Sec.RelOff;
220 Temp.nreloc = Sec.NReloc;
221 Temp.flags = Sec.Flags;
222 Temp.reserved1 = Sec.Reserved1;
223 Temp.reserved2 = Sec.Reserved2;
224
225 if (IsLittleEndian != sys::IsLittleEndianHost)
226 MachO::swapStruct(Temp);
227 memcpy(Out, &Temp, sizeof(StructType));
228 Out += sizeof(StructType);
229 }
230
writeSections()231 void MachOWriter::writeSections() {
232 for (const auto &LC : O.LoadCommands)
233 for (const auto &Sec : LC.Sections) {
234 if (Sec.isVirtualSection())
235 continue;
236
237 assert(Sec.Offset && "Section offset can not be zero");
238 assert((Sec.Size == Sec.Content.size()) && "Incorrect section size");
239 memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(),
240 Sec.Content.size());
241 for (size_t Index = 0; Index < Sec.Relocations.size(); ++Index) {
242 auto RelocInfo = Sec.Relocations[Index];
243 if (!RelocInfo.Scattered) {
244 auto *Info =
245 reinterpret_cast<MachO::relocation_info *>(&RelocInfo.Info);
246 Info->r_symbolnum = RelocInfo.Symbol->Index;
247 }
248
249 if (IsLittleEndian != sys::IsLittleEndianHost)
250 MachO::swapStruct(
251 reinterpret_cast<MachO::any_relocation_info &>(RelocInfo.Info));
252 memcpy(B.getBufferStart() + Sec.RelOff +
253 Index * sizeof(MachO::any_relocation_info),
254 &RelocInfo.Info, sizeof(RelocInfo.Info));
255 }
256 }
257 }
258
259 template <typename NListType>
writeNListEntry(const SymbolEntry & SE,bool IsLittleEndian,char * & Out,uint32_t Nstrx)260 void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out,
261 uint32_t Nstrx) {
262 NListType ListEntry;
263 ListEntry.n_strx = Nstrx;
264 ListEntry.n_type = SE.n_type;
265 ListEntry.n_sect = SE.n_sect;
266 ListEntry.n_desc = SE.n_desc;
267 ListEntry.n_value = SE.n_value;
268
269 if (IsLittleEndian != sys::IsLittleEndianHost)
270 MachO::swapStruct(ListEntry);
271 memcpy(Out, reinterpret_cast<const char *>(&ListEntry), sizeof(NListType));
272 Out += sizeof(NListType);
273 }
274
writeStringTable()275 void MachOWriter::writeStringTable() {
276 if (!O.SymTabCommandIndex)
277 return;
278 const MachO::symtab_command &SymTabCommand =
279 O.LoadCommands[*O.SymTabCommandIndex]
280 .MachOLoadCommand.symtab_command_data;
281
282 uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff;
283 LayoutBuilder.getStringTableBuilder().write(StrTable);
284 }
285
writeSymbolTable()286 void MachOWriter::writeSymbolTable() {
287 if (!O.SymTabCommandIndex)
288 return;
289 const MachO::symtab_command &SymTabCommand =
290 O.LoadCommands[*O.SymTabCommandIndex]
291 .MachOLoadCommand.symtab_command_data;
292
293 char *SymTable = (char *)B.getBufferStart() + SymTabCommand.symoff;
294 for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end();
295 Iter != End; Iter++) {
296 SymbolEntry *Sym = Iter->get();
297 uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name);
298
299 if (Is64Bit)
300 writeNListEntry<MachO::nlist_64>(*Sym, IsLittleEndian, SymTable, Nstrx);
301 else
302 writeNListEntry<MachO::nlist>(*Sym, IsLittleEndian, SymTable, Nstrx);
303 }
304 }
305
writeRebaseInfo()306 void MachOWriter::writeRebaseInfo() {
307 if (!O.DyLdInfoCommandIndex)
308 return;
309 const MachO::dyld_info_command &DyLdInfoCommand =
310 O.LoadCommands[*O.DyLdInfoCommandIndex]
311 .MachOLoadCommand.dyld_info_command_data;
312 char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.rebase_off;
313 assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) &&
314 "Incorrect rebase opcodes size");
315 memcpy(Out, O.Rebases.Opcodes.data(), O.Rebases.Opcodes.size());
316 }
317
writeBindInfo()318 void MachOWriter::writeBindInfo() {
319 if (!O.DyLdInfoCommandIndex)
320 return;
321 const MachO::dyld_info_command &DyLdInfoCommand =
322 O.LoadCommands[*O.DyLdInfoCommandIndex]
323 .MachOLoadCommand.dyld_info_command_data;
324 char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.bind_off;
325 assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) &&
326 "Incorrect bind opcodes size");
327 memcpy(Out, O.Binds.Opcodes.data(), O.Binds.Opcodes.size());
328 }
329
writeWeakBindInfo()330 void MachOWriter::writeWeakBindInfo() {
331 if (!O.DyLdInfoCommandIndex)
332 return;
333 const MachO::dyld_info_command &DyLdInfoCommand =
334 O.LoadCommands[*O.DyLdInfoCommandIndex]
335 .MachOLoadCommand.dyld_info_command_data;
336 char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.weak_bind_off;
337 assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) &&
338 "Incorrect weak bind opcodes size");
339 memcpy(Out, O.WeakBinds.Opcodes.data(), O.WeakBinds.Opcodes.size());
340 }
341
writeLazyBindInfo()342 void MachOWriter::writeLazyBindInfo() {
343 if (!O.DyLdInfoCommandIndex)
344 return;
345 const MachO::dyld_info_command &DyLdInfoCommand =
346 O.LoadCommands[*O.DyLdInfoCommandIndex]
347 .MachOLoadCommand.dyld_info_command_data;
348 char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.lazy_bind_off;
349 assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) &&
350 "Incorrect lazy bind opcodes size");
351 memcpy(Out, O.LazyBinds.Opcodes.data(), O.LazyBinds.Opcodes.size());
352 }
353
writeExportInfo()354 void MachOWriter::writeExportInfo() {
355 if (!O.DyLdInfoCommandIndex)
356 return;
357 const MachO::dyld_info_command &DyLdInfoCommand =
358 O.LoadCommands[*O.DyLdInfoCommandIndex]
359 .MachOLoadCommand.dyld_info_command_data;
360 char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.export_off;
361 assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) &&
362 "Incorrect export trie size");
363 memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size());
364 }
365
writeIndirectSymbolTable()366 void MachOWriter::writeIndirectSymbolTable() {
367 if (!O.DySymTabCommandIndex)
368 return;
369
370 const MachO::dysymtab_command &DySymTabCommand =
371 O.LoadCommands[*O.DySymTabCommandIndex]
372 .MachOLoadCommand.dysymtab_command_data;
373
374 uint32_t *Out =
375 (uint32_t *)(B.getBufferStart() + DySymTabCommand.indirectsymoff);
376 for (const IndirectSymbolEntry &Sym : O.IndirectSymTable.Symbols) {
377 uint32_t Entry = (Sym.Symbol) ? (*Sym.Symbol)->Index : Sym.OriginalIndex;
378 if (IsLittleEndian != sys::IsLittleEndianHost)
379 sys::swapByteOrder(Entry);
380 *Out++ = Entry;
381 }
382 }
383
writeDataInCodeData()384 void MachOWriter::writeDataInCodeData() {
385 if (!O.DataInCodeCommandIndex)
386 return;
387 const MachO::linkedit_data_command &LinkEditDataCommand =
388 O.LoadCommands[*O.DataInCodeCommandIndex]
389 .MachOLoadCommand.linkedit_data_command_data;
390 char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
391 assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) &&
392 "Incorrect data in code data size");
393 memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size());
394 }
395
writeFunctionStartsData()396 void MachOWriter::writeFunctionStartsData() {
397 if (!O.FunctionStartsCommandIndex)
398 return;
399 const MachO::linkedit_data_command &LinkEditDataCommand =
400 O.LoadCommands[*O.FunctionStartsCommandIndex]
401 .MachOLoadCommand.linkedit_data_command_data;
402 char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
403 assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) &&
404 "Incorrect function starts data size");
405 memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size());
406 }
407
writeTail()408 void MachOWriter::writeTail() {
409 typedef void (MachOWriter::*WriteHandlerType)(void);
410 typedef std::pair<uint64_t, WriteHandlerType> WriteOperation;
411 SmallVector<WriteOperation, 7> Queue;
412
413 if (O.SymTabCommandIndex) {
414 const MachO::symtab_command &SymTabCommand =
415 O.LoadCommands[*O.SymTabCommandIndex]
416 .MachOLoadCommand.symtab_command_data;
417 if (SymTabCommand.symoff)
418 Queue.push_back({SymTabCommand.symoff, &MachOWriter::writeSymbolTable});
419 if (SymTabCommand.stroff)
420 Queue.push_back({SymTabCommand.stroff, &MachOWriter::writeStringTable});
421 }
422
423 if (O.DyLdInfoCommandIndex) {
424 const MachO::dyld_info_command &DyLdInfoCommand =
425 O.LoadCommands[*O.DyLdInfoCommandIndex]
426 .MachOLoadCommand.dyld_info_command_data;
427 if (DyLdInfoCommand.rebase_off)
428 Queue.push_back(
429 {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo});
430 if (DyLdInfoCommand.bind_off)
431 Queue.push_back({DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo});
432 if (DyLdInfoCommand.weak_bind_off)
433 Queue.push_back(
434 {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo});
435 if (DyLdInfoCommand.lazy_bind_off)
436 Queue.push_back(
437 {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo});
438 if (DyLdInfoCommand.export_off)
439 Queue.push_back(
440 {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo});
441 }
442
443 if (O.DySymTabCommandIndex) {
444 const MachO::dysymtab_command &DySymTabCommand =
445 O.LoadCommands[*O.DySymTabCommandIndex]
446 .MachOLoadCommand.dysymtab_command_data;
447
448 if (DySymTabCommand.indirectsymoff)
449 Queue.emplace_back(DySymTabCommand.indirectsymoff,
450 &MachOWriter::writeIndirectSymbolTable);
451 }
452
453 if (O.DataInCodeCommandIndex) {
454 const MachO::linkedit_data_command &LinkEditDataCommand =
455 O.LoadCommands[*O.DataInCodeCommandIndex]
456 .MachOLoadCommand.linkedit_data_command_data;
457
458 if (LinkEditDataCommand.dataoff)
459 Queue.emplace_back(LinkEditDataCommand.dataoff,
460 &MachOWriter::writeDataInCodeData);
461 }
462
463 if (O.FunctionStartsCommandIndex) {
464 const MachO::linkedit_data_command &LinkEditDataCommand =
465 O.LoadCommands[*O.FunctionStartsCommandIndex]
466 .MachOLoadCommand.linkedit_data_command_data;
467
468 if (LinkEditDataCommand.dataoff)
469 Queue.emplace_back(LinkEditDataCommand.dataoff,
470 &MachOWriter::writeFunctionStartsData);
471 }
472
473 llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
474 return LHS.first < RHS.first;
475 });
476
477 for (auto WriteOp : Queue)
478 (this->*WriteOp.second)();
479 }
480
finalize()481 Error MachOWriter::finalize() { return LayoutBuilder.layout(); }
482
write()483 Error MachOWriter::write() {
484 if (Error E = B.allocate(totalSize()))
485 return E;
486 memset(B.getBufferStart(), 0, totalSize());
487 writeHeader();
488 writeLoadCommands();
489 writeSections();
490 writeTail();
491 return B.commit();
492 }
493
494 } // end namespace macho
495 } // end namespace objcopy
496 } // end namespace llvm
497