1 //===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MachOLayoutBuilder.h"
10 #include "llvm/Support/Alignment.h"
11 #include "llvm/Support/Errc.h"
12 #include "llvm/Support/ErrorHandling.h"
13 
14 using namespace llvm;
15 using namespace llvm::objcopy::macho;
16 
17 StringTableBuilder::Kind
18 MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) {
19   if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT)
20     return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO;
21   return Is64Bit ? StringTableBuilder::MachO64Linked
22                  : StringTableBuilder::MachOLinked;
23 }
24 
25 uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
26   uint32_t Size = 0;
27   for (const LoadCommand &LC : O.LoadCommands) {
28     const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
29     auto cmd = MLC.load_command_data.cmd;
30     switch (cmd) {
31     case MachO::LC_SEGMENT:
32       Size += sizeof(MachO::segment_command) +
33               sizeof(MachO::section) * LC.Sections.size();
34       continue;
35     case MachO::LC_SEGMENT_64:
36       Size += sizeof(MachO::segment_command_64) +
37               sizeof(MachO::section_64) * LC.Sections.size();
38       continue;
39     }
40 
41     switch (cmd) {
42 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
43   case MachO::LCName:                                                          \
44     Size += sizeof(MachO::LCStruct) + LC.Payload.size();                       \
45     break;
46 #include "llvm/BinaryFormat/MachO.def"
47 #undef HANDLE_LOAD_COMMAND
48     }
49   }
50 
51   return Size;
52 }
53 
54 void MachOLayoutBuilder::constructStringTable() {
55   for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
56     StrTableBuilder.add(Sym->Name);
57   StrTableBuilder.finalize();
58 }
59 
60 void MachOLayoutBuilder::updateSymbolIndexes() {
61   uint32_t Index = 0;
62   for (auto &Symbol : O.SymTable.Symbols)
63     Symbol->Index = Index++;
64 }
65 
66 // Updates the index and the number of local/external/undefined symbols.
67 void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
68   assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
69   // Make sure that nlist entries in the symbol table are sorted by the those
70   // types. The order is: local < defined external < undefined external.
71   assert(llvm::is_sorted(O.SymTable.Symbols,
72                          [](const std::unique_ptr<SymbolEntry> &A,
73                             const std::unique_ptr<SymbolEntry> &B) {
74                            bool AL = A->isLocalSymbol(),
75                                 BL = B->isLocalSymbol();
76                            if (AL != BL)
77                              return AL;
78                            return !AL && !A->isUndefinedSymbol() &&
79                                   B->isUndefinedSymbol();
80                          }) &&
81          "Symbols are not sorted by their types.");
82 
83   uint32_t NumLocalSymbols = 0;
84   auto Iter = O.SymTable.Symbols.begin();
85   auto End = O.SymTable.Symbols.end();
86   for (; Iter != End; ++Iter) {
87     if ((*Iter)->isExternalSymbol())
88       break;
89 
90     ++NumLocalSymbols;
91   }
92 
93   uint32_t NumExtDefSymbols = 0;
94   for (; Iter != End; ++Iter) {
95     if ((*Iter)->isUndefinedSymbol())
96       break;
97 
98     ++NumExtDefSymbols;
99   }
100 
101   MLC.dysymtab_command_data.ilocalsym = 0;
102   MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
103   MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
104   MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
105   MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
106   MLC.dysymtab_command_data.nundefsym =
107       O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
108 }
109 
110 // Recomputes and updates offset and size fields in load commands and sections
111 // since they could be modified.
112 uint64_t MachOLayoutBuilder::layoutSegments() {
113   auto HeaderSize =
114       Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
115   const bool IsObjectFile =
116       O.Header.FileType == MachO::HeaderFileType::MH_OBJECT;
117   uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0;
118   for (LoadCommand &LC : O.LoadCommands) {
119     auto &MLC = LC.MachOLoadCommand;
120     StringRef Segname;
121     uint64_t SegmentVmAddr;
122     uint64_t SegmentVmSize;
123     switch (MLC.load_command_data.cmd) {
124     case MachO::LC_SEGMENT:
125       SegmentVmAddr = MLC.segment_command_data.vmaddr;
126       SegmentVmSize = MLC.segment_command_data.vmsize;
127       Segname = StringRef(MLC.segment_command_data.segname,
128                           strnlen(MLC.segment_command_data.segname,
129                                   sizeof(MLC.segment_command_data.segname)));
130       break;
131     case MachO::LC_SEGMENT_64:
132       SegmentVmAddr = MLC.segment_command_64_data.vmaddr;
133       SegmentVmSize = MLC.segment_command_64_data.vmsize;
134       Segname = StringRef(MLC.segment_command_64_data.segname,
135                           strnlen(MLC.segment_command_64_data.segname,
136                                   sizeof(MLC.segment_command_64_data.segname)));
137       break;
138     default:
139       continue;
140     }
141 
142     if (Segname == "__LINKEDIT") {
143       // We update the __LINKEDIT segment later (in layoutTail).
144       assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
145       LinkEditLoadCommand = &MLC;
146       continue;
147     }
148 
149     // Update file offsets and sizes of sections.
150     uint64_t SegOffset = Offset;
151     uint64_t SegFileSize = 0;
152     uint64_t VMSize = 0;
153     for (std::unique_ptr<Section> &Sec : LC.Sections) {
154       assert(SegmentVmAddr <= Sec->Addr &&
155              "Section's address cannot be smaller than Segment's one");
156       uint32_t SectOffset = Sec->Addr - SegmentVmAddr;
157       if (IsObjectFile) {
158         if (!Sec->hasValidOffset()) {
159           Sec->Offset = 0;
160         } else {
161           uint64_t PaddingSize =
162               offsetToAlignment(SegFileSize, Align(1ull << Sec->Align));
163           Sec->Offset = SegOffset + SegFileSize + PaddingSize;
164           Sec->Size = Sec->Content.size();
165           SegFileSize += PaddingSize + Sec->Size;
166         }
167       } else {
168         if (!Sec->hasValidOffset()) {
169           Sec->Offset = 0;
170         } else {
171           Sec->Offset = SegOffset + SectOffset;
172           Sec->Size = Sec->Content.size();
173           SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size);
174         }
175       }
176       VMSize = std::max(VMSize, SectOffset + Sec->Size);
177     }
178 
179     if (IsObjectFile) {
180       Offset += SegFileSize;
181     } else {
182       Offset = alignTo(Offset + SegFileSize, PageSize);
183       SegFileSize = alignTo(SegFileSize, PageSize);
184       // Use the original vmsize if the segment is __PAGEZERO.
185       VMSize =
186           Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize);
187     }
188 
189     switch (MLC.load_command_data.cmd) {
190     case MachO::LC_SEGMENT:
191       MLC.segment_command_data.cmdsize =
192           sizeof(MachO::segment_command) +
193           sizeof(MachO::section) * LC.Sections.size();
194       MLC.segment_command_data.nsects = LC.Sections.size();
195       MLC.segment_command_data.fileoff = SegOffset;
196       MLC.segment_command_data.vmsize = VMSize;
197       MLC.segment_command_data.filesize = SegFileSize;
198       break;
199     case MachO::LC_SEGMENT_64:
200       MLC.segment_command_64_data.cmdsize =
201           sizeof(MachO::segment_command_64) +
202           sizeof(MachO::section_64) * LC.Sections.size();
203       MLC.segment_command_64_data.nsects = LC.Sections.size();
204       MLC.segment_command_64_data.fileoff = SegOffset;
205       MLC.segment_command_64_data.vmsize = VMSize;
206       MLC.segment_command_64_data.filesize = SegFileSize;
207       break;
208     }
209   }
210 
211   return Offset;
212 }
213 
214 uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
215   for (LoadCommand &LC : O.LoadCommands)
216     for (std::unique_ptr<Section> &Sec : LC.Sections) {
217       Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset;
218       Sec->NReloc = Sec->Relocations.size();
219       Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc;
220     }
221 
222   return Offset;
223 }
224 
225 Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
226   // If we are building the layout of an executable or dynamic library
227   // which does not have any segments other than __LINKEDIT,
228   // the Offset can be equal to zero by this time. It happens because of the
229   // convention that in such cases the file offsets specified by LC_SEGMENT
230   // start with zero (unlike the case of a relocatable object file).
231   const uint64_t HeaderSize =
232       Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
233   assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) ||
234           Offset >= HeaderSize + O.Header.SizeOfCmds) &&
235          "Incorrect tail offset");
236   Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds);
237 
238   // The exports trie can be in either LC_DYLD_INFO or in
239   // LC_DYLD_EXPORTS_TRIE, but not both.
240   size_t DyldInfoExportsTrieSize = 0;
241   size_t DyldExportsTrieSize = 0;
242   for (const auto &LC : O.LoadCommands) {
243     switch (LC.MachOLoadCommand.load_command_data.cmd) {
244     case MachO::LC_DYLD_INFO:
245     case MachO::LC_DYLD_INFO_ONLY:
246       DyldInfoExportsTrieSize = O.Exports.Trie.size();
247       break;
248     case MachO::LC_DYLD_EXPORTS_TRIE:
249       DyldExportsTrieSize = O.Exports.Trie.size();
250       break;
251     default:
252       break;
253     }
254   }
255   assert((DyldInfoExportsTrieSize == 0 || DyldExportsTrieSize == 0) &&
256          "Export trie in both LCs");
257 
258   uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
259   uint64_t StartOfLinkEdit = Offset;
260 
261   // The order of LINKEDIT elements is as follows:
262   // rebase info, binding info, weak binding info, lazy binding info, export
263   // trie, chained fixups, dyld exports trie, function starts, data-in-code,
264   // symbol table, indirect symbol table, symbol table strings,
265   // dylib codesign drs, and code signature.
266   auto updateOffset = [&Offset](size_t Size) {
267     uint64_t PreviousOffset = Offset;
268     Offset += Size;
269     return PreviousOffset;
270   };
271 
272   uint64_t StartOfRebaseInfo = updateOffset(O.Rebases.Opcodes.size());
273   uint64_t StartOfBindingInfo = updateOffset(O.Binds.Opcodes.size());
274   uint64_t StartOfWeakBindingInfo = updateOffset(O.WeakBinds.Opcodes.size());
275   uint64_t StartOfLazyBindingInfo = updateOffset(O.LazyBinds.Opcodes.size());
276   uint64_t StartOfExportTrie = updateOffset(DyldInfoExportsTrieSize);
277   uint64_t StartOfChainedFixups = updateOffset(O.ChainedFixups.Data.size());
278   uint64_t StartOfDyldExportsTrie = updateOffset(DyldExportsTrieSize);
279   uint64_t StartOfFunctionStarts = updateOffset(O.FunctionStarts.Data.size());
280   uint64_t StartOfDataInCode = updateOffset(O.DataInCode.Data.size());
281   uint64_t StartOfLinkerOptimizationHint =
282       updateOffset(O.LinkerOptimizationHint.Data.size());
283   uint64_t StartOfSymbols = updateOffset(NListSize * O.SymTable.Symbols.size());
284   uint64_t StartOfIndirectSymbols =
285       updateOffset(sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
286   uint64_t StartOfSymbolStrings = updateOffset(StrTableBuilder.getSize());
287   uint64_t StartOfDylibCodeSignDRs = updateOffset(O.DylibCodeSignDRs.Data.size());
288 
289   uint64_t StartOfCodeSignature = Offset;
290   uint32_t CodeSignatureSize = 0;
291   if (O.CodeSignatureCommandIndex) {
292     StartOfCodeSignature = alignTo(StartOfCodeSignature, 16);
293 
294     // Note: These calculations are to be kept in sync with the same
295     // calculations performed in LLD's CodeSignatureSection.
296     const uint32_t AllHeadersSize =
297         alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1,
298                 CodeSignature.Align);
299     const uint32_t BlockCount =
300         (StartOfCodeSignature + CodeSignature.BlockSize - 1) /
301         CodeSignature.BlockSize;
302     const uint32_t Size =
303         alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize,
304                 CodeSignature.Align);
305 
306     CodeSignature.StartOffset = StartOfCodeSignature;
307     CodeSignature.AllHeadersSize = AllHeadersSize;
308     CodeSignature.BlockCount = BlockCount;
309     CodeSignature.OutputFileName = OutputFileName;
310     CodeSignature.Size = Size;
311     CodeSignatureSize = Size;
312   }
313   uint64_t LinkEditSize =
314       StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit;
315 
316   // Now we have determined the layout of the contents of the __LINKEDIT
317   // segment. Update its load command.
318   if (LinkEditLoadCommand) {
319     MachO::macho_load_command *MLC = LinkEditLoadCommand;
320     switch (LinkEditLoadCommand->load_command_data.cmd) {
321     case MachO::LC_SEGMENT:
322       MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
323       MLC->segment_command_data.fileoff = StartOfLinkEdit;
324       MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
325       MLC->segment_command_data.filesize = LinkEditSize;
326       break;
327     case MachO::LC_SEGMENT_64:
328       MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
329       MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
330       MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
331       MLC->segment_command_64_data.filesize = LinkEditSize;
332       break;
333     }
334   }
335 
336   for (LoadCommand &LC : O.LoadCommands) {
337     auto &MLC = LC.MachOLoadCommand;
338     auto cmd = MLC.load_command_data.cmd;
339     switch (cmd) {
340     case MachO::LC_CODE_SIGNATURE:
341       MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature;
342       MLC.linkedit_data_command_data.datasize = CodeSignatureSize;
343       break;
344     case MachO::LC_DYLIB_CODE_SIGN_DRS:
345       MLC.linkedit_data_command_data.dataoff = StartOfDylibCodeSignDRs;
346       MLC.linkedit_data_command_data.datasize = O.DylibCodeSignDRs.Data.size();
347       break;
348     case MachO::LC_SYMTAB:
349       MLC.symtab_command_data.symoff = StartOfSymbols;
350       MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
351       MLC.symtab_command_data.stroff = StartOfSymbolStrings;
352       MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
353       break;
354     case MachO::LC_DYSYMTAB: {
355       if (MLC.dysymtab_command_data.ntoc != 0 ||
356           MLC.dysymtab_command_data.nmodtab != 0 ||
357           MLC.dysymtab_command_data.nextrefsyms != 0 ||
358           MLC.dysymtab_command_data.nlocrel != 0 ||
359           MLC.dysymtab_command_data.nextrel != 0)
360         return createStringError(llvm::errc::not_supported,
361                                  "shared library is not yet supported");
362 
363       if (!O.IndirectSymTable.Symbols.empty()) {
364         MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols;
365         MLC.dysymtab_command_data.nindirectsyms =
366             O.IndirectSymTable.Symbols.size();
367       }
368 
369       updateDySymTab(MLC);
370       break;
371     }
372     case MachO::LC_DATA_IN_CODE:
373       MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
374       MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
375       break;
376     case MachO::LC_LINKER_OPTIMIZATION_HINT:
377       MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint;
378       MLC.linkedit_data_command_data.datasize =
379           O.LinkerOptimizationHint.Data.size();
380       break;
381     case MachO::LC_FUNCTION_STARTS:
382       MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
383       MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
384       break;
385     case MachO::LC_DYLD_CHAINED_FIXUPS:
386       MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups;
387       MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size();
388       break;
389     case MachO::LC_DYLD_EXPORTS_TRIE:
390       MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie;
391       MLC.linkedit_data_command_data.datasize = DyldExportsTrieSize;
392       break;
393     case MachO::LC_DYLD_INFO:
394     case MachO::LC_DYLD_INFO_ONLY:
395       MLC.dyld_info_command_data.rebase_off =
396           O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
397       MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
398       MLC.dyld_info_command_data.bind_off =
399           O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
400       MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
401       MLC.dyld_info_command_data.weak_bind_off =
402           O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
403       MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
404       MLC.dyld_info_command_data.lazy_bind_off =
405           O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
406       MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
407       MLC.dyld_info_command_data.export_off =
408           O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
409       MLC.dyld_info_command_data.export_size = DyldInfoExportsTrieSize;
410       break;
411     // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in
412     // <mach-o/loader.h> is not an offset in the binary file, instead, it is a
413     // relative virtual address. At the moment modification of the __TEXT
414     // segment of executables isn't supported anyway (e.g. data in code entries
415     // are not recalculated). Moreover, in general
416     // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because
417     // without making additional assumptions (e.g. that the entire __TEXT
418     // segment should be encrypted) we do not know how to recalculate the
419     // boundaries of the encrypted part. For now just copy over these load
420     // commands until we encounter a real world usecase where
421     // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted.
422     case MachO::LC_ENCRYPTION_INFO:
423     case MachO::LC_ENCRYPTION_INFO_64:
424     case MachO::LC_LOAD_DYLINKER:
425     case MachO::LC_MAIN:
426     case MachO::LC_RPATH:
427     case MachO::LC_SEGMENT:
428     case MachO::LC_SEGMENT_64:
429     case MachO::LC_VERSION_MIN_MACOSX:
430     case MachO::LC_VERSION_MIN_IPHONEOS:
431     case MachO::LC_VERSION_MIN_TVOS:
432     case MachO::LC_VERSION_MIN_WATCHOS:
433     case MachO::LC_BUILD_VERSION:
434     case MachO::LC_ID_DYLIB:
435     case MachO::LC_LOAD_DYLIB:
436     case MachO::LC_LOAD_WEAK_DYLIB:
437     case MachO::LC_UUID:
438     case MachO::LC_SOURCE_VERSION:
439     case MachO::LC_THREAD:
440     case MachO::LC_UNIXTHREAD:
441     case MachO::LC_SUB_FRAMEWORK:
442     case MachO::LC_SUB_UMBRELLA:
443     case MachO::LC_SUB_CLIENT:
444     case MachO::LC_SUB_LIBRARY:
445     case MachO::LC_LINKER_OPTION:
446       // Nothing to update.
447       break;
448     default:
449       // Abort if it's unsupported in order to prevent corrupting the object.
450       return createStringError(llvm::errc::not_supported,
451                                "unsupported load command (cmd=0x%x)", cmd);
452     }
453   }
454 
455   return Error::success();
456 }
457 
458 Error MachOLayoutBuilder::layout() {
459   O.Header.NCmds = O.LoadCommands.size();
460   O.Header.SizeOfCmds = computeSizeOfCmds();
461   constructStringTable();
462   updateSymbolIndexes();
463   uint64_t Offset = layoutSegments();
464   Offset = layoutRelocations(Offset);
465   return layoutTail(Offset);
466 }
467