1 //===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MachOLayoutBuilder.h"
10 #include "llvm/Support/Alignment.h"
11 #include "llvm/Support/Errc.h"
12 #include "llvm/Support/ErrorHandling.h"
13 
14 namespace llvm {
15 namespace objcopy {
16 namespace macho {
17 
18 StringTableBuilder::Kind
getStringTableBuilderKind(const Object & O,bool Is64Bit)19 MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) {
20   if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT)
21     return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO;
22   return Is64Bit ? StringTableBuilder::MachO64Linked
23                  : StringTableBuilder::MachOLinked;
24 }
25 
computeSizeOfCmds() const26 uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
27   uint32_t Size = 0;
28   for (const LoadCommand &LC : O.LoadCommands) {
29     const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
30     auto cmd = MLC.load_command_data.cmd;
31     switch (cmd) {
32     case MachO::LC_SEGMENT:
33       Size += sizeof(MachO::segment_command) +
34               sizeof(MachO::section) * LC.Sections.size();
35       continue;
36     case MachO::LC_SEGMENT_64:
37       Size += sizeof(MachO::segment_command_64) +
38               sizeof(MachO::section_64) * LC.Sections.size();
39       continue;
40     }
41 
42     switch (cmd) {
43 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
44   case MachO::LCName:                                                          \
45     Size += sizeof(MachO::LCStruct) + LC.Payload.size();                       \
46     break;
47 #include "llvm/BinaryFormat/MachO.def"
48 #undef HANDLE_LOAD_COMMAND
49     }
50   }
51 
52   return Size;
53 }
54 
constructStringTable()55 void MachOLayoutBuilder::constructStringTable() {
56   for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
57     StrTableBuilder.add(Sym->Name);
58   StrTableBuilder.finalize();
59 }
60 
updateSymbolIndexes()61 void MachOLayoutBuilder::updateSymbolIndexes() {
62   uint32_t Index = 0;
63   for (auto &Symbol : O.SymTable.Symbols)
64     Symbol->Index = Index++;
65 }
66 
67 // Updates the index and the number of local/external/undefined symbols.
updateDySymTab(MachO::macho_load_command & MLC)68 void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
69   assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
70   // Make sure that nlist entries in the symbol table are sorted by the those
71   // types. The order is: local < defined external < undefined external.
72   assert(llvm::is_sorted(O.SymTable.Symbols,
73                          [](const std::unique_ptr<SymbolEntry> &A,
74                             const std::unique_ptr<SymbolEntry> &B) {
75                            bool AL = A->isLocalSymbol(),
76                                 BL = B->isLocalSymbol();
77                            if (AL != BL)
78                              return AL;
79                            return !AL && !A->isUndefinedSymbol() &&
80                                   B->isUndefinedSymbol();
81                          }) &&
82          "Symbols are not sorted by their types.");
83 
84   uint32_t NumLocalSymbols = 0;
85   auto Iter = O.SymTable.Symbols.begin();
86   auto End = O.SymTable.Symbols.end();
87   for (; Iter != End; ++Iter) {
88     if ((*Iter)->isExternalSymbol())
89       break;
90 
91     ++NumLocalSymbols;
92   }
93 
94   uint32_t NumExtDefSymbols = 0;
95   for (; Iter != End; ++Iter) {
96     if ((*Iter)->isUndefinedSymbol())
97       break;
98 
99     ++NumExtDefSymbols;
100   }
101 
102   MLC.dysymtab_command_data.ilocalsym = 0;
103   MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
104   MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
105   MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
106   MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
107   MLC.dysymtab_command_data.nundefsym =
108       O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
109 }
110 
111 // Recomputes and updates offset and size fields in load commands and sections
112 // since they could be modified.
layoutSegments()113 uint64_t MachOLayoutBuilder::layoutSegments() {
114   auto HeaderSize =
115       Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
116   const bool IsObjectFile =
117       O.Header.FileType == MachO::HeaderFileType::MH_OBJECT;
118   uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0;
119   for (LoadCommand &LC : O.LoadCommands) {
120     auto &MLC = LC.MachOLoadCommand;
121     StringRef Segname;
122     uint64_t SegmentVmAddr;
123     uint64_t SegmentVmSize;
124     switch (MLC.load_command_data.cmd) {
125     case MachO::LC_SEGMENT:
126       SegmentVmAddr = MLC.segment_command_data.vmaddr;
127       SegmentVmSize = MLC.segment_command_data.vmsize;
128       Segname = StringRef(MLC.segment_command_data.segname,
129                           strnlen(MLC.segment_command_data.segname,
130                                   sizeof(MLC.segment_command_data.segname)));
131       break;
132     case MachO::LC_SEGMENT_64:
133       SegmentVmAddr = MLC.segment_command_64_data.vmaddr;
134       SegmentVmSize = MLC.segment_command_64_data.vmsize;
135       Segname = StringRef(MLC.segment_command_64_data.segname,
136                           strnlen(MLC.segment_command_64_data.segname,
137                                   sizeof(MLC.segment_command_64_data.segname)));
138       break;
139     default:
140       continue;
141     }
142 
143     if (Segname == "__LINKEDIT") {
144       // We update the __LINKEDIT segment later (in layoutTail).
145       assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
146       LinkEditLoadCommand = &MLC;
147       continue;
148     }
149 
150     // Update file offsets and sizes of sections.
151     uint64_t SegOffset = Offset;
152     uint64_t SegFileSize = 0;
153     uint64_t VMSize = 0;
154     for (std::unique_ptr<Section> &Sec : LC.Sections) {
155       assert(SegmentVmAddr <= Sec->Addr &&
156              "Section's address cannot be smaller than Segment's one");
157       uint32_t SectOffset = Sec->Addr - SegmentVmAddr;
158       if (IsObjectFile) {
159         if (!Sec->hasValidOffset()) {
160           Sec->Offset = 0;
161         } else {
162           uint64_t PaddingSize =
163               offsetToAlignment(SegFileSize, Align(1ull << Sec->Align));
164           Sec->Offset = SegOffset + SegFileSize + PaddingSize;
165           Sec->Size = Sec->Content.size();
166           SegFileSize += PaddingSize + Sec->Size;
167         }
168       } else {
169         if (!Sec->hasValidOffset()) {
170           Sec->Offset = 0;
171         } else {
172           Sec->Offset = SegOffset + SectOffset;
173           Sec->Size = Sec->Content.size();
174           SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size);
175         }
176       }
177       VMSize = std::max(VMSize, SectOffset + Sec->Size);
178     }
179 
180     if (IsObjectFile) {
181       Offset += SegFileSize;
182     } else {
183       Offset = alignTo(Offset + SegFileSize, PageSize);
184       SegFileSize = alignTo(SegFileSize, PageSize);
185       // Use the original vmsize if the segment is __PAGEZERO.
186       VMSize =
187           Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize);
188     }
189 
190     switch (MLC.load_command_data.cmd) {
191     case MachO::LC_SEGMENT:
192       MLC.segment_command_data.cmdsize =
193           sizeof(MachO::segment_command) +
194           sizeof(MachO::section) * LC.Sections.size();
195       MLC.segment_command_data.nsects = LC.Sections.size();
196       MLC.segment_command_data.fileoff = SegOffset;
197       MLC.segment_command_data.vmsize = VMSize;
198       MLC.segment_command_data.filesize = SegFileSize;
199       break;
200     case MachO::LC_SEGMENT_64:
201       MLC.segment_command_64_data.cmdsize =
202           sizeof(MachO::segment_command_64) +
203           sizeof(MachO::section_64) * LC.Sections.size();
204       MLC.segment_command_64_data.nsects = LC.Sections.size();
205       MLC.segment_command_64_data.fileoff = SegOffset;
206       MLC.segment_command_64_data.vmsize = VMSize;
207       MLC.segment_command_64_data.filesize = SegFileSize;
208       break;
209     }
210   }
211 
212   return Offset;
213 }
214 
layoutRelocations(uint64_t Offset)215 uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
216   for (LoadCommand &LC : O.LoadCommands)
217     for (std::unique_ptr<Section> &Sec : LC.Sections) {
218       Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset;
219       Sec->NReloc = Sec->Relocations.size();
220       Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc;
221     }
222 
223   return Offset;
224 }
225 
layoutTail(uint64_t Offset)226 Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
227   // If we are building the layout of an executable or dynamic library
228   // which does not have any segments other than __LINKEDIT,
229   // the Offset can be equal to zero by this time. It happens because of the
230   // convention that in such cases the file offsets specified by LC_SEGMENT
231   // start with zero (unlike the case of a relocatable object file).
232   const uint64_t HeaderSize =
233       Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
234   assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) ||
235           Offset >= HeaderSize + O.Header.SizeOfCmds) &&
236          "Incorrect tail offset");
237   Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds);
238 
239   // The order of LINKEDIT elements is as follows:
240   // rebase info, binding info, weak binding info, lazy binding info, export
241   // trie, data-in-code, symbol table, indirect symbol table, symbol table
242   // strings, code signature.
243   uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
244   uint64_t StartOfLinkEdit = Offset;
245   uint64_t StartOfRebaseInfo = StartOfLinkEdit;
246   uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size();
247   uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size();
248   uint64_t StartOfLazyBindingInfo =
249       StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size();
250   uint64_t StartOfExportTrie =
251       StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size();
252   uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size();
253   uint64_t StartOfDataInCode =
254       StartOfFunctionStarts + O.FunctionStarts.Data.size();
255   uint64_t StartOfSymbols = StartOfDataInCode + O.DataInCode.Data.size();
256   uint64_t StartOfIndirectSymbols =
257       StartOfSymbols + NListSize * O.SymTable.Symbols.size();
258   uint64_t StartOfSymbolStrings =
259       StartOfIndirectSymbols +
260       sizeof(uint32_t) * O.IndirectSymTable.Symbols.size();
261   uint64_t StartOfCodeSignature =
262       StartOfSymbolStrings + StrTableBuilder.getSize();
263   if (O.CodeSignatureCommandIndex)
264     StartOfCodeSignature = alignTo(StartOfCodeSignature, 16);
265   uint64_t LinkEditSize =
266       (StartOfCodeSignature + O.CodeSignature.Data.size()) - StartOfLinkEdit;
267 
268   // Now we have determined the layout of the contents of the __LINKEDIT
269   // segment. Update its load command.
270   if (LinkEditLoadCommand) {
271     MachO::macho_load_command *MLC = LinkEditLoadCommand;
272     switch (LinkEditLoadCommand->load_command_data.cmd) {
273     case MachO::LC_SEGMENT:
274       MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
275       MLC->segment_command_data.fileoff = StartOfLinkEdit;
276       MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
277       MLC->segment_command_data.filesize = LinkEditSize;
278       break;
279     case MachO::LC_SEGMENT_64:
280       MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
281       MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
282       MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
283       MLC->segment_command_64_data.filesize = LinkEditSize;
284       break;
285     }
286   }
287 
288   for (LoadCommand &LC : O.LoadCommands) {
289     auto &MLC = LC.MachOLoadCommand;
290     auto cmd = MLC.load_command_data.cmd;
291     switch (cmd) {
292     case MachO::LC_CODE_SIGNATURE:
293       MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature;
294       MLC.linkedit_data_command_data.datasize = O.CodeSignature.Data.size();
295       break;
296     case MachO::LC_SYMTAB:
297       MLC.symtab_command_data.symoff = StartOfSymbols;
298       MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
299       MLC.symtab_command_data.stroff = StartOfSymbolStrings;
300       MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
301       break;
302     case MachO::LC_DYSYMTAB: {
303       if (MLC.dysymtab_command_data.ntoc != 0 ||
304           MLC.dysymtab_command_data.nmodtab != 0 ||
305           MLC.dysymtab_command_data.nextrefsyms != 0 ||
306           MLC.dysymtab_command_data.nlocrel != 0 ||
307           MLC.dysymtab_command_data.nextrel != 0)
308         return createStringError(llvm::errc::not_supported,
309                                  "shared library is not yet supported");
310 
311       if (!O.IndirectSymTable.Symbols.empty()) {
312         MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols;
313         MLC.dysymtab_command_data.nindirectsyms =
314             O.IndirectSymTable.Symbols.size();
315       }
316 
317       updateDySymTab(MLC);
318       break;
319     }
320     case MachO::LC_DATA_IN_CODE:
321       MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
322       MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
323       break;
324     case MachO::LC_FUNCTION_STARTS:
325       MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
326       MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
327       break;
328     case MachO::LC_DYLD_INFO:
329     case MachO::LC_DYLD_INFO_ONLY:
330       MLC.dyld_info_command_data.rebase_off =
331           O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
332       MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
333       MLC.dyld_info_command_data.bind_off =
334           O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
335       MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
336       MLC.dyld_info_command_data.weak_bind_off =
337           O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
338       MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
339       MLC.dyld_info_command_data.lazy_bind_off =
340           O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
341       MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
342       MLC.dyld_info_command_data.export_off =
343           O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
344       MLC.dyld_info_command_data.export_size = O.Exports.Trie.size();
345       break;
346     // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in
347     // <mach-o/loader.h> is not an offset in the binary file, instead, it is a
348     // relative virtual address. At the moment modification of the __TEXT
349     // segment of executables isn't supported anyway (e.g. data in code entries
350     // are not recalculated). Moreover, in general
351     // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because
352     // without making additional assumptions (e.g. that the entire __TEXT
353     // segment should be encrypted) we do not know how to recalculate the
354     // boundaries of the encrypted part. For now just copy over these load
355     // commands until we encounter a real world usecase where
356     // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted.
357     case MachO::LC_ENCRYPTION_INFO:
358     case MachO::LC_ENCRYPTION_INFO_64:
359     case MachO::LC_LOAD_DYLINKER:
360     case MachO::LC_MAIN:
361     case MachO::LC_RPATH:
362     case MachO::LC_SEGMENT:
363     case MachO::LC_SEGMENT_64:
364     case MachO::LC_VERSION_MIN_MACOSX:
365     case MachO::LC_VERSION_MIN_IPHONEOS:
366     case MachO::LC_VERSION_MIN_TVOS:
367     case MachO::LC_VERSION_MIN_WATCHOS:
368     case MachO::LC_BUILD_VERSION:
369     case MachO::LC_ID_DYLIB:
370     case MachO::LC_LOAD_DYLIB:
371     case MachO::LC_LOAD_WEAK_DYLIB:
372     case MachO::LC_UUID:
373     case MachO::LC_SOURCE_VERSION:
374       // Nothing to update.
375       break;
376     default:
377       // Abort if it's unsupported in order to prevent corrupting the object.
378       return createStringError(llvm::errc::not_supported,
379                                "unsupported load command (cmd=0x%x)", cmd);
380     }
381   }
382 
383   return Error::success();
384 }
385 
layout()386 Error MachOLayoutBuilder::layout() {
387   O.Header.NCmds = O.LoadCommands.size();
388   O.Header.SizeOfCmds = computeSizeOfCmds();
389   constructStringTable();
390   updateSymbolIndexes();
391   uint64_t Offset = layoutSegments();
392   Offset = layoutRelocations(Offset);
393   return layoutTail(Offset);
394 }
395 
396 } // end namespace macho
397 } // end namespace objcopy
398 } // end namespace llvm
399