1 //===- MachOWriter.cpp ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MachOWriter.h"
10 #include "MachOLayoutBuilder.h"
11 #include "Object.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/BinaryFormat/MachO.h"
14 #include "llvm/Object/MachO.h"
15 #include "llvm/Support/Errc.h"
16 #include "llvm/Support/ErrorHandling.h"
17 #include <memory>
18 
19 namespace llvm {
20 namespace objcopy {
21 namespace macho {
22 
headerSize() const23 size_t MachOWriter::headerSize() const {
24   return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
25 }
26 
loadCommandsSize() const27 size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; }
28 
symTableSize() const29 size_t MachOWriter::symTableSize() const {
30   return O.SymTable.Symbols.size() *
31          (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist));
32 }
33 
totalSize() const34 size_t MachOWriter::totalSize() const {
35   // Going from tail to head and looking for an appropriate "anchor" to
36   // calculate the total size assuming that all the offsets are either valid
37   // ("true") or 0 (0 indicates that the corresponding part is missing).
38 
39   SmallVector<size_t, 7> Ends;
40   if (O.SymTabCommandIndex) {
41     const MachO::symtab_command &SymTabCommand =
42         O.LoadCommands[*O.SymTabCommandIndex]
43             .MachOLoadCommand.symtab_command_data;
44     if (SymTabCommand.symoff)
45       Ends.push_back(SymTabCommand.symoff + symTableSize());
46     if (SymTabCommand.stroff)
47       Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize);
48   }
49   if (O.DyLdInfoCommandIndex) {
50     const MachO::dyld_info_command &DyLdInfoCommand =
51         O.LoadCommands[*O.DyLdInfoCommandIndex]
52             .MachOLoadCommand.dyld_info_command_data;
53     if (DyLdInfoCommand.rebase_off) {
54       assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) &&
55              "Incorrect rebase opcodes size");
56       Ends.push_back(DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size);
57     }
58     if (DyLdInfoCommand.bind_off) {
59       assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) &&
60              "Incorrect bind opcodes size");
61       Ends.push_back(DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size);
62     }
63     if (DyLdInfoCommand.weak_bind_off) {
64       assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) &&
65              "Incorrect weak bind opcodes size");
66       Ends.push_back(DyLdInfoCommand.weak_bind_off +
67                      DyLdInfoCommand.weak_bind_size);
68     }
69     if (DyLdInfoCommand.lazy_bind_off) {
70       assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) &&
71              "Incorrect lazy bind opcodes size");
72       Ends.push_back(DyLdInfoCommand.lazy_bind_off +
73                      DyLdInfoCommand.lazy_bind_size);
74     }
75     if (DyLdInfoCommand.export_off) {
76       assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) &&
77              "Incorrect trie size");
78       Ends.push_back(DyLdInfoCommand.export_off + DyLdInfoCommand.export_size);
79     }
80   }
81 
82   if (O.DySymTabCommandIndex) {
83     const MachO::dysymtab_command &DySymTabCommand =
84         O.LoadCommands[*O.DySymTabCommandIndex]
85             .MachOLoadCommand.dysymtab_command_data;
86 
87     if (DySymTabCommand.indirectsymoff)
88       Ends.push_back(DySymTabCommand.indirectsymoff +
89                      sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
90   }
91 
92   if (O.DataInCodeCommandIndex) {
93     const MachO::linkedit_data_command &LinkEditDataCommand =
94         O.LoadCommands[*O.DataInCodeCommandIndex]
95             .MachOLoadCommand.linkedit_data_command_data;
96 
97     if (LinkEditDataCommand.dataoff)
98       Ends.push_back(LinkEditDataCommand.dataoff +
99                      LinkEditDataCommand.datasize);
100   }
101 
102   if (O.FunctionStartsCommandIndex) {
103     const MachO::linkedit_data_command &LinkEditDataCommand =
104         O.LoadCommands[*O.FunctionStartsCommandIndex]
105             .MachOLoadCommand.linkedit_data_command_data;
106 
107     if (LinkEditDataCommand.dataoff)
108       Ends.push_back(LinkEditDataCommand.dataoff +
109                      LinkEditDataCommand.datasize);
110   }
111 
112   // Otherwise, use the last section / reloction.
113   for (const auto &LC : O.LoadCommands)
114     for (const auto &S : LC.Sections) {
115       Ends.push_back(S.Offset + S.Size);
116       if (S.RelOff)
117         Ends.push_back(S.RelOff +
118                        S.NReloc * sizeof(MachO::any_relocation_info));
119     }
120 
121   if (!Ends.empty())
122     return *std::max_element(Ends.begin(), Ends.end());
123 
124   // Otherwise, we have only Mach header and load commands.
125   return headerSize() + loadCommandsSize();
126 }
127 
writeHeader()128 void MachOWriter::writeHeader() {
129   MachO::mach_header_64 Header;
130 
131   Header.magic = O.Header.Magic;
132   Header.cputype = O.Header.CPUType;
133   Header.cpusubtype = O.Header.CPUSubType;
134   Header.filetype = O.Header.FileType;
135   Header.ncmds = O.Header.NCmds;
136   Header.sizeofcmds = O.Header.SizeOfCmds;
137   Header.flags = O.Header.Flags;
138   Header.reserved = O.Header.Reserved;
139 
140   if (IsLittleEndian != sys::IsLittleEndianHost)
141     MachO::swapStruct(Header);
142 
143   auto HeaderSize =
144       Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
145   memcpy(B.getBufferStart(), &Header, HeaderSize);
146 }
147 
writeLoadCommands()148 void MachOWriter::writeLoadCommands() {
149   uint8_t *Begin = B.getBufferStart() + headerSize();
150   for (const auto &LC : O.LoadCommands) {
151     // Construct a load command.
152     MachO::macho_load_command MLC = LC.MachOLoadCommand;
153     switch (MLC.load_command_data.cmd) {
154     case MachO::LC_SEGMENT:
155       if (IsLittleEndian != sys::IsLittleEndianHost)
156         MachO::swapStruct(MLC.segment_command_data);
157       memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command));
158       Begin += sizeof(MachO::segment_command);
159 
160       for (const auto &Sec : LC.Sections)
161         writeSectionInLoadCommand<MachO::section>(Sec, Begin);
162       continue;
163     case MachO::LC_SEGMENT_64:
164       if (IsLittleEndian != sys::IsLittleEndianHost)
165         MachO::swapStruct(MLC.segment_command_64_data);
166       memcpy(Begin, &MLC.segment_command_64_data,
167              sizeof(MachO::segment_command_64));
168       Begin += sizeof(MachO::segment_command_64);
169 
170       for (const auto &Sec : LC.Sections)
171         writeSectionInLoadCommand<MachO::section_64>(Sec, Begin);
172       continue;
173     }
174 
175 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
176   case MachO::LCName:                                                          \
177     assert(sizeof(MachO::LCStruct) + LC.Payload.size() ==                      \
178            MLC.load_command_data.cmdsize);                                     \
179     if (IsLittleEndian != sys::IsLittleEndianHost)                             \
180       MachO::swapStruct(MLC.LCStruct##_data);                                  \
181     memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct));              \
182     Begin += sizeof(MachO::LCStruct);                                          \
183     if (!LC.Payload.empty())                                                   \
184       memcpy(Begin, LC.Payload.data(), LC.Payload.size());                     \
185     Begin += LC.Payload.size();                                                \
186     break;
187 
188     // Copy the load command as it is.
189     switch (MLC.load_command_data.cmd) {
190     default:
191       assert(sizeof(MachO::load_command) + LC.Payload.size() ==
192              MLC.load_command_data.cmdsize);
193       if (IsLittleEndian != sys::IsLittleEndianHost)
194         MachO::swapStruct(MLC.load_command_data);
195       memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command));
196       Begin += sizeof(MachO::load_command);
197       if (!LC.Payload.empty())
198         memcpy(Begin, LC.Payload.data(), LC.Payload.size());
199       Begin += LC.Payload.size();
200       break;
201 #include "llvm/BinaryFormat/MachO.def"
202     }
203   }
204 }
205 
206 template <typename StructType>
writeSectionInLoadCommand(const Section & Sec,uint8_t * & Out)207 void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) {
208   StructType Temp;
209   assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name");
210   assert(Sec.Sectname.size() <= sizeof(Temp.sectname) &&
211          "too long section name");
212   memset(&Temp, 0, sizeof(StructType));
213   memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size());
214   memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size());
215   Temp.addr = Sec.Addr;
216   Temp.size = Sec.Size;
217   Temp.offset = Sec.Offset;
218   Temp.align = Sec.Align;
219   Temp.reloff = Sec.RelOff;
220   Temp.nreloc = Sec.NReloc;
221   Temp.flags = Sec.Flags;
222   Temp.reserved1 = Sec.Reserved1;
223   Temp.reserved2 = Sec.Reserved2;
224 
225   if (IsLittleEndian != sys::IsLittleEndianHost)
226     MachO::swapStruct(Temp);
227   memcpy(Out, &Temp, sizeof(StructType));
228   Out += sizeof(StructType);
229 }
230 
writeSections()231 void MachOWriter::writeSections() {
232   for (const auto &LC : O.LoadCommands)
233     for (const auto &Sec : LC.Sections) {
234       if (Sec.isVirtualSection())
235         continue;
236 
237       assert(Sec.Offset && "Section offset can not be zero");
238       assert((Sec.Size == Sec.Content.size()) && "Incorrect section size");
239       memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(),
240              Sec.Content.size());
241       for (size_t Index = 0; Index < Sec.Relocations.size(); ++Index) {
242         auto RelocInfo = Sec.Relocations[Index];
243         if (!RelocInfo.Scattered) {
244           auto *Info =
245               reinterpret_cast<MachO::relocation_info *>(&RelocInfo.Info);
246           Info->r_symbolnum = RelocInfo.Symbol->Index;
247         }
248 
249         if (IsLittleEndian != sys::IsLittleEndianHost)
250           MachO::swapStruct(
251               reinterpret_cast<MachO::any_relocation_info &>(RelocInfo.Info));
252         memcpy(B.getBufferStart() + Sec.RelOff +
253                    Index * sizeof(MachO::any_relocation_info),
254                &RelocInfo.Info, sizeof(RelocInfo.Info));
255       }
256     }
257 }
258 
259 template <typename NListType>
writeNListEntry(const SymbolEntry & SE,bool IsLittleEndian,char * & Out,uint32_t Nstrx)260 void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out,
261                      uint32_t Nstrx) {
262   NListType ListEntry;
263   ListEntry.n_strx = Nstrx;
264   ListEntry.n_type = SE.n_type;
265   ListEntry.n_sect = SE.n_sect;
266   ListEntry.n_desc = SE.n_desc;
267   ListEntry.n_value = SE.n_value;
268 
269   if (IsLittleEndian != sys::IsLittleEndianHost)
270     MachO::swapStruct(ListEntry);
271   memcpy(Out, reinterpret_cast<const char *>(&ListEntry), sizeof(NListType));
272   Out += sizeof(NListType);
273 }
274 
writeStringTable()275 void MachOWriter::writeStringTable() {
276   if (!O.SymTabCommandIndex)
277     return;
278   const MachO::symtab_command &SymTabCommand =
279       O.LoadCommands[*O.SymTabCommandIndex]
280           .MachOLoadCommand.symtab_command_data;
281 
282   uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff;
283   LayoutBuilder.getStringTableBuilder().write(StrTable);
284 }
285 
writeSymbolTable()286 void MachOWriter::writeSymbolTable() {
287   if (!O.SymTabCommandIndex)
288     return;
289   const MachO::symtab_command &SymTabCommand =
290       O.LoadCommands[*O.SymTabCommandIndex]
291           .MachOLoadCommand.symtab_command_data;
292 
293   char *SymTable = (char *)B.getBufferStart() + SymTabCommand.symoff;
294   for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end();
295        Iter != End; Iter++) {
296     SymbolEntry *Sym = Iter->get();
297     uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name);
298 
299     if (Is64Bit)
300       writeNListEntry<MachO::nlist_64>(*Sym, IsLittleEndian, SymTable, Nstrx);
301     else
302       writeNListEntry<MachO::nlist>(*Sym, IsLittleEndian, SymTable, Nstrx);
303   }
304 }
305 
writeRebaseInfo()306 void MachOWriter::writeRebaseInfo() {
307   if (!O.DyLdInfoCommandIndex)
308     return;
309   const MachO::dyld_info_command &DyLdInfoCommand =
310       O.LoadCommands[*O.DyLdInfoCommandIndex]
311           .MachOLoadCommand.dyld_info_command_data;
312   char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.rebase_off;
313   assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) &&
314          "Incorrect rebase opcodes size");
315   memcpy(Out, O.Rebases.Opcodes.data(), O.Rebases.Opcodes.size());
316 }
317 
writeBindInfo()318 void MachOWriter::writeBindInfo() {
319   if (!O.DyLdInfoCommandIndex)
320     return;
321   const MachO::dyld_info_command &DyLdInfoCommand =
322       O.LoadCommands[*O.DyLdInfoCommandIndex]
323           .MachOLoadCommand.dyld_info_command_data;
324   char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.bind_off;
325   assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) &&
326          "Incorrect bind opcodes size");
327   memcpy(Out, O.Binds.Opcodes.data(), O.Binds.Opcodes.size());
328 }
329 
writeWeakBindInfo()330 void MachOWriter::writeWeakBindInfo() {
331   if (!O.DyLdInfoCommandIndex)
332     return;
333   const MachO::dyld_info_command &DyLdInfoCommand =
334       O.LoadCommands[*O.DyLdInfoCommandIndex]
335           .MachOLoadCommand.dyld_info_command_data;
336   char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.weak_bind_off;
337   assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) &&
338          "Incorrect weak bind opcodes size");
339   memcpy(Out, O.WeakBinds.Opcodes.data(), O.WeakBinds.Opcodes.size());
340 }
341 
writeLazyBindInfo()342 void MachOWriter::writeLazyBindInfo() {
343   if (!O.DyLdInfoCommandIndex)
344     return;
345   const MachO::dyld_info_command &DyLdInfoCommand =
346       O.LoadCommands[*O.DyLdInfoCommandIndex]
347           .MachOLoadCommand.dyld_info_command_data;
348   char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.lazy_bind_off;
349   assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) &&
350          "Incorrect lazy bind opcodes size");
351   memcpy(Out, O.LazyBinds.Opcodes.data(), O.LazyBinds.Opcodes.size());
352 }
353 
writeExportInfo()354 void MachOWriter::writeExportInfo() {
355   if (!O.DyLdInfoCommandIndex)
356     return;
357   const MachO::dyld_info_command &DyLdInfoCommand =
358       O.LoadCommands[*O.DyLdInfoCommandIndex]
359           .MachOLoadCommand.dyld_info_command_data;
360   char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.export_off;
361   assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) &&
362          "Incorrect export trie size");
363   memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size());
364 }
365 
writeIndirectSymbolTable()366 void MachOWriter::writeIndirectSymbolTable() {
367   if (!O.DySymTabCommandIndex)
368     return;
369 
370   const MachO::dysymtab_command &DySymTabCommand =
371       O.LoadCommands[*O.DySymTabCommandIndex]
372           .MachOLoadCommand.dysymtab_command_data;
373 
374   uint32_t *Out =
375       (uint32_t *)(B.getBufferStart() + DySymTabCommand.indirectsymoff);
376   for (const IndirectSymbolEntry &Sym : O.IndirectSymTable.Symbols) {
377     uint32_t Entry = (Sym.Symbol) ? (*Sym.Symbol)->Index : Sym.OriginalIndex;
378     if (IsLittleEndian != sys::IsLittleEndianHost)
379       sys::swapByteOrder(Entry);
380     *Out++ = Entry;
381   }
382 }
383 
writeDataInCodeData()384 void MachOWriter::writeDataInCodeData() {
385   if (!O.DataInCodeCommandIndex)
386     return;
387   const MachO::linkedit_data_command &LinkEditDataCommand =
388       O.LoadCommands[*O.DataInCodeCommandIndex]
389           .MachOLoadCommand.linkedit_data_command_data;
390   char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
391   assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) &&
392          "Incorrect data in code data size");
393   memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size());
394 }
395 
writeFunctionStartsData()396 void MachOWriter::writeFunctionStartsData() {
397   if (!O.FunctionStartsCommandIndex)
398     return;
399   const MachO::linkedit_data_command &LinkEditDataCommand =
400       O.LoadCommands[*O.FunctionStartsCommandIndex]
401           .MachOLoadCommand.linkedit_data_command_data;
402   char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
403   assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) &&
404          "Incorrect function starts data size");
405   memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size());
406 }
407 
writeTail()408 void MachOWriter::writeTail() {
409   typedef void (MachOWriter::*WriteHandlerType)(void);
410   typedef std::pair<uint64_t, WriteHandlerType> WriteOperation;
411   SmallVector<WriteOperation, 7> Queue;
412 
413   if (O.SymTabCommandIndex) {
414     const MachO::symtab_command &SymTabCommand =
415         O.LoadCommands[*O.SymTabCommandIndex]
416             .MachOLoadCommand.symtab_command_data;
417     if (SymTabCommand.symoff)
418       Queue.push_back({SymTabCommand.symoff, &MachOWriter::writeSymbolTable});
419     if (SymTabCommand.stroff)
420       Queue.push_back({SymTabCommand.stroff, &MachOWriter::writeStringTable});
421   }
422 
423   if (O.DyLdInfoCommandIndex) {
424     const MachO::dyld_info_command &DyLdInfoCommand =
425         O.LoadCommands[*O.DyLdInfoCommandIndex]
426             .MachOLoadCommand.dyld_info_command_data;
427     if (DyLdInfoCommand.rebase_off)
428       Queue.push_back(
429           {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo});
430     if (DyLdInfoCommand.bind_off)
431       Queue.push_back({DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo});
432     if (DyLdInfoCommand.weak_bind_off)
433       Queue.push_back(
434           {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo});
435     if (DyLdInfoCommand.lazy_bind_off)
436       Queue.push_back(
437           {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo});
438     if (DyLdInfoCommand.export_off)
439       Queue.push_back(
440           {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo});
441   }
442 
443   if (O.DySymTabCommandIndex) {
444     const MachO::dysymtab_command &DySymTabCommand =
445         O.LoadCommands[*O.DySymTabCommandIndex]
446             .MachOLoadCommand.dysymtab_command_data;
447 
448     if (DySymTabCommand.indirectsymoff)
449       Queue.emplace_back(DySymTabCommand.indirectsymoff,
450                          &MachOWriter::writeIndirectSymbolTable);
451   }
452 
453   if (O.DataInCodeCommandIndex) {
454     const MachO::linkedit_data_command &LinkEditDataCommand =
455         O.LoadCommands[*O.DataInCodeCommandIndex]
456             .MachOLoadCommand.linkedit_data_command_data;
457 
458     if (LinkEditDataCommand.dataoff)
459       Queue.emplace_back(LinkEditDataCommand.dataoff,
460                          &MachOWriter::writeDataInCodeData);
461   }
462 
463   if (O.FunctionStartsCommandIndex) {
464     const MachO::linkedit_data_command &LinkEditDataCommand =
465         O.LoadCommands[*O.FunctionStartsCommandIndex]
466             .MachOLoadCommand.linkedit_data_command_data;
467 
468     if (LinkEditDataCommand.dataoff)
469       Queue.emplace_back(LinkEditDataCommand.dataoff,
470                          &MachOWriter::writeFunctionStartsData);
471   }
472 
473   llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
474     return LHS.first < RHS.first;
475   });
476 
477   for (auto WriteOp : Queue)
478     (this->*WriteOp.second)();
479 }
480 
finalize()481 Error MachOWriter::finalize() { return LayoutBuilder.layout(); }
482 
write()483 Error MachOWriter::write() {
484   if (Error E = B.allocate(totalSize()))
485     return E;
486   memset(B.getBufferStart(), 0, totalSize());
487   writeHeader();
488   writeLoadCommands();
489   writeSections();
490   writeTail();
491   return B.commit();
492 }
493 
494 } // end namespace macho
495 } // end namespace objcopy
496 } // end namespace llvm
497