1 //===- MachOReader.cpp ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MachOReader.h"
10 #include "MachOObject.h"
11 #include "llvm/BinaryFormat/MachO.h"
12 #include "llvm/Object/MachO.h"
13 #include "llvm/Support/Errc.h"
14 #include "llvm/Support/SystemZ/zOSSupport.h"
15 #include <memory>
16 
17 using namespace llvm;
18 using namespace llvm::objcopy;
19 using namespace llvm::objcopy::macho;
20 
readHeader(Object & O) const21 void MachOReader::readHeader(Object &O) const {
22   O.Header.Magic = MachOObj.getHeader().magic;
23   O.Header.CPUType = MachOObj.getHeader().cputype;
24   O.Header.CPUSubType = MachOObj.getHeader().cpusubtype;
25   O.Header.FileType = MachOObj.getHeader().filetype;
26   O.Header.NCmds = MachOObj.getHeader().ncmds;
27   O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds;
28   O.Header.Flags = MachOObj.getHeader().flags;
29 }
30 
31 template <typename SectionType>
constructSectionCommon(const SectionType & Sec,uint32_t Index)32 static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) {
33   StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname)));
34   StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)));
35   Section S(SegName, SectName);
36   S.Index = Index;
37   S.Addr = Sec.addr;
38   S.Size = Sec.size;
39   S.OriginalOffset = Sec.offset;
40   S.Align = Sec.align;
41   S.RelOff = Sec.reloff;
42   S.NReloc = Sec.nreloc;
43   S.Flags = Sec.flags;
44   S.Reserved1 = Sec.reserved1;
45   S.Reserved2 = Sec.reserved2;
46   S.Reserved3 = 0;
47   return S;
48 }
49 
constructSection(const MachO::section & Sec,uint32_t Index)50 Section constructSection(const MachO::section &Sec, uint32_t Index) {
51   return constructSectionCommon(Sec, Index);
52 }
53 
constructSection(const MachO::section_64 & Sec,uint32_t Index)54 Section constructSection(const MachO::section_64 &Sec, uint32_t Index) {
55   Section S = constructSectionCommon(Sec, Index);
56   S.Reserved3 = Sec.reserved3;
57   return S;
58 }
59 
60 template <typename SectionType, typename SegmentType>
extractSections(const object::MachOObjectFile::LoadCommandInfo & LoadCmd,const object::MachOObjectFile & MachOObj,uint32_t & NextSectionIndex)61 Expected<std::vector<std::unique_ptr<Section>>> static extractSections(
62     const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
63     const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) {
64   std::vector<std::unique_ptr<Section>> Sections;
65   for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
66                                                          sizeof(SegmentType)),
67             End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
68                                                         LoadCmd.C.cmdsize);
69        Curr < End; ++Curr) {
70     SectionType Sec;
71     memcpy((void *)&Sec, reinterpret_cast<const char *>(Curr),
72            sizeof(SectionType));
73 
74     if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
75       MachO::swapStruct(Sec);
76 
77     Sections.push_back(
78         std::make_unique<Section>(constructSection(Sec, NextSectionIndex)));
79 
80     Section &S = *Sections.back();
81 
82     Expected<object::SectionRef> SecRef =
83         MachOObj.getSection(NextSectionIndex++);
84     if (!SecRef)
85       return SecRef.takeError();
86 
87     Expected<ArrayRef<uint8_t>> Data =
88         MachOObj.getSectionContents(SecRef->getRawDataRefImpl());
89     if (!Data)
90       return Data.takeError();
91 
92     S.Content =
93         StringRef(reinterpret_cast<const char *>(Data->data()), Data->size());
94 
95     const uint32_t CPUType = MachOObj.getHeader().cputype;
96     S.Relocations.reserve(S.NReloc);
97     for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()),
98               RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl());
99          RI != RE; ++RI) {
100       RelocationInfo R;
101       R.Symbol = nullptr; // We'll fill this field later.
102       R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl());
103       R.Scattered = MachOObj.isRelocationScattered(R.Info);
104       unsigned Type = MachOObj.getAnyRelocationType(R.Info);
105       // TODO Support CPU_TYPE_ARM.
106       R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 &&
107                                     Type == MachO::ARM64_RELOC_ADDEND);
108       R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info);
109       S.Relocations.push_back(R);
110     }
111 
112     assert(S.NReloc == S.Relocations.size() &&
113            "Incorrect number of relocations");
114   }
115   return std::move(Sections);
116 }
117 
readLoadCommands(Object & O) const118 Error MachOReader::readLoadCommands(Object &O) const {
119   // For MachO sections indices start from 1.
120   uint32_t NextSectionIndex = 1;
121   static constexpr char TextSegmentName[] = "__TEXT";
122   for (auto LoadCmd : MachOObj.load_commands()) {
123     LoadCommand LC;
124     switch (LoadCmd.C.cmd) {
125     case MachO::LC_CODE_SIGNATURE:
126       O.CodeSignatureCommandIndex = O.LoadCommands.size();
127       break;
128     case MachO::LC_SEGMENT:
129       // LoadCmd.Ptr might not be aligned temporarily as
130       // MachO::segment_command requires, but the segname char pointer do not
131       // have alignment restrictions.
132       if (StringRef(reinterpret_cast<const char *>(
133               LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) ==
134           TextSegmentName)
135         O.TextSegmentCommandIndex = O.LoadCommands.size();
136 
137       if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
138               extractSections<MachO::section, MachO::segment_command>(
139                   LoadCmd, MachOObj, NextSectionIndex))
140         LC.Sections = std::move(*Sections);
141       else
142         return Sections.takeError();
143       break;
144     case MachO::LC_SEGMENT_64:
145       // LoadCmd.Ptr might not be aligned temporarily as
146       // MachO::segment_command_64 requires, but the segname char pointer do
147       // not have alignment restrictions.
148       if (StringRef(reinterpret_cast<const char *>(
149               LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) ==
150           TextSegmentName)
151         O.TextSegmentCommandIndex = O.LoadCommands.size();
152 
153       if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
154               extractSections<MachO::section_64, MachO::segment_command_64>(
155                   LoadCmd, MachOObj, NextSectionIndex))
156         LC.Sections = std::move(*Sections);
157       else
158         return Sections.takeError();
159       break;
160     case MachO::LC_SYMTAB:
161       O.SymTabCommandIndex = O.LoadCommands.size();
162       break;
163     case MachO::LC_DYSYMTAB:
164       O.DySymTabCommandIndex = O.LoadCommands.size();
165       break;
166     case MachO::LC_DYLD_INFO:
167     case MachO::LC_DYLD_INFO_ONLY:
168       O.DyLdInfoCommandIndex = O.LoadCommands.size();
169       break;
170     case MachO::LC_DATA_IN_CODE:
171       O.DataInCodeCommandIndex = O.LoadCommands.size();
172       break;
173     case MachO::LC_LINKER_OPTIMIZATION_HINT:
174       O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size();
175       break;
176     case MachO::LC_FUNCTION_STARTS:
177       O.FunctionStartsCommandIndex = O.LoadCommands.size();
178       break;
179     case MachO::LC_DYLIB_CODE_SIGN_DRS:
180       O.DylibCodeSignDRsIndex = O.LoadCommands.size();
181       break;
182     case MachO::LC_DYLD_EXPORTS_TRIE:
183       O.ExportsTrieCommandIndex = O.LoadCommands.size();
184       break;
185     case MachO::LC_DYLD_CHAINED_FIXUPS:
186       O.ChainedFixupsCommandIndex = O.LoadCommands.size();
187       break;
188     }
189 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
190   case MachO::LCName:                                                          \
191     memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr,        \
192            sizeof(MachO::LCStruct));                                           \
193     if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)                  \
194       MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data);                  \
195     if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct))                           \
196       LC.Payload = ArrayRef<uint8_t>(                                          \
197           reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +       \
198               sizeof(MachO::LCStruct),                                         \
199           LoadCmd.C.cmdsize - sizeof(MachO::LCStruct));                        \
200     break;
201 
202     switch (LoadCmd.C.cmd) {
203     default:
204       memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr,
205              sizeof(MachO::load_command));
206       if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
207         MachO::swapStruct(LC.MachOLoadCommand.load_command_data);
208       if (LoadCmd.C.cmdsize > sizeof(MachO::load_command))
209         LC.Payload = ArrayRef<uint8_t>(
210             reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +
211                 sizeof(MachO::load_command),
212             LoadCmd.C.cmdsize - sizeof(MachO::load_command));
213       break;
214 #include "llvm/BinaryFormat/MachO.def"
215     }
216     O.LoadCommands.push_back(std::move(LC));
217   }
218   return Error::success();
219 }
220 
221 template <typename nlist_t>
constructSymbolEntry(StringRef StrTable,const nlist_t & nlist)222 SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) {
223   assert(nlist.n_strx < StrTable.size() &&
224          "n_strx exceeds the size of the string table");
225   SymbolEntry SE;
226   SE.Name = StringRef(StrTable.data() + nlist.n_strx).str();
227   SE.n_type = nlist.n_type;
228   SE.n_sect = nlist.n_sect;
229   SE.n_desc = nlist.n_desc;
230   SE.n_value = nlist.n_value;
231   return SE;
232 }
233 
readSymbolTable(Object & O) const234 void MachOReader::readSymbolTable(Object &O) const {
235   StringRef StrTable = MachOObj.getStringTableData();
236   for (auto Symbol : MachOObj.symbols()) {
237     SymbolEntry SE =
238         (MachOObj.is64Bit()
239              ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry(
240                                                   Symbol.getRawDataRefImpl()))
241              : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry(
242                                                   Symbol.getRawDataRefImpl())));
243 
244     O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE));
245   }
246 }
247 
setSymbolInRelocationInfo(Object & O) const248 void MachOReader::setSymbolInRelocationInfo(Object &O) const {
249   std::vector<const Section *> Sections;
250   for (auto &LC : O.LoadCommands)
251     for (std::unique_ptr<Section> &Sec : LC.Sections)
252       Sections.push_back(Sec.get());
253 
254   for (LoadCommand &LC : O.LoadCommands)
255     for (std::unique_ptr<Section> &Sec : LC.Sections)
256       for (auto &Reloc : Sec->Relocations)
257         if (!Reloc.Scattered && !Reloc.IsAddend) {
258           const uint32_t SymbolNum =
259               Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian());
260           if (Reloc.Extern) {
261             Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum);
262           } else {
263             // FIXME: Refactor error handling in MachOReader and report an error
264             // if we encounter an invalid relocation.
265             assert(SymbolNum >= 1 && SymbolNum <= Sections.size() &&
266                    "Invalid section index.");
267             Reloc.Sec = Sections[SymbolNum - 1];
268           }
269         }
270 }
271 
readRebaseInfo(Object & O) const272 void MachOReader::readRebaseInfo(Object &O) const {
273   O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes();
274 }
275 
readBindInfo(Object & O) const276 void MachOReader::readBindInfo(Object &O) const {
277   O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes();
278 }
279 
readWeakBindInfo(Object & O) const280 void MachOReader::readWeakBindInfo(Object &O) const {
281   O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes();
282 }
283 
readLazyBindInfo(Object & O) const284 void MachOReader::readLazyBindInfo(Object &O) const {
285   O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes();
286 }
287 
readExportInfo(Object & O) const288 void MachOReader::readExportInfo(Object &O) const {
289   // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE
290   ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie();
291   if (Trie.empty())
292     Trie = MachOObj.getDyldExportsTrie();
293   O.Exports.Trie = Trie;
294 }
295 
readLinkData(Object & O,std::optional<size_t> LCIndex,LinkData & LD) const296 void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex,
297                                LinkData &LD) const {
298   if (!LCIndex)
299     return;
300   const MachO::linkedit_data_command &LC =
301       O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data;
302   LD.Data =
303       arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize));
304 }
305 
readDataInCodeData(Object & O) const306 void MachOReader::readDataInCodeData(Object &O) const {
307   return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode);
308 }
309 
readLinkerOptimizationHint(Object & O) const310 void MachOReader::readLinkerOptimizationHint(Object &O) const {
311   return readLinkData(O, O.LinkerOptimizationHintCommandIndex,
312                       O.LinkerOptimizationHint);
313 }
314 
readFunctionStartsData(Object & O) const315 void MachOReader::readFunctionStartsData(Object &O) const {
316   return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts);
317 }
318 
readDylibCodeSignDRs(Object & O) const319 void MachOReader::readDylibCodeSignDRs(Object &O) const {
320   return readLinkData(O, O.DylibCodeSignDRsIndex, O.DylibCodeSignDRs);
321 }
322 
readExportsTrie(Object & O) const323 void MachOReader::readExportsTrie(Object &O) const {
324   return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie);
325 }
326 
readChainedFixups(Object & O) const327 void MachOReader::readChainedFixups(Object &O) const {
328   return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups);
329 }
330 
readIndirectSymbolTable(Object & O) const331 void MachOReader::readIndirectSymbolTable(Object &O) const {
332   MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
333   constexpr uint32_t AbsOrLocalMask =
334       MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS;
335   for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) {
336     uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i);
337     if ((Index & AbsOrLocalMask) != 0)
338       O.IndirectSymTable.Symbols.emplace_back(Index, std::nullopt);
339     else
340       O.IndirectSymTable.Symbols.emplace_back(
341           Index, O.SymTable.getSymbolByIndex(Index));
342   }
343 }
344 
readSwiftVersion(Object & O) const345 void MachOReader::readSwiftVersion(Object &O) const {
346   struct ObjCImageInfo {
347     uint32_t Version;
348     uint32_t Flags;
349   } ImageInfo;
350 
351   for (const LoadCommand &LC : O.LoadCommands)
352     for (const std::unique_ptr<Section> &Sec : LC.Sections)
353       if (Sec->Sectname == "__objc_imageinfo" &&
354           (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" ||
355            Sec->Segname == "__DATA_DIRTY") &&
356           Sec->Content.size() >= sizeof(ObjCImageInfo)) {
357         memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo));
358         if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) {
359           sys::swapByteOrder(ImageInfo.Version);
360           sys::swapByteOrder(ImageInfo.Flags);
361         }
362         O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff;
363         return;
364       }
365 }
366 
create() const367 Expected<std::unique_ptr<Object>> MachOReader::create() const {
368   auto Obj = std::make_unique<Object>();
369   readHeader(*Obj);
370   if (Error E = readLoadCommands(*Obj))
371     return std::move(E);
372   readSymbolTable(*Obj);
373   setSymbolInRelocationInfo(*Obj);
374   readRebaseInfo(*Obj);
375   readBindInfo(*Obj);
376   readWeakBindInfo(*Obj);
377   readLazyBindInfo(*Obj);
378   readExportInfo(*Obj);
379   readDataInCodeData(*Obj);
380   readLinkerOptimizationHint(*Obj);
381   readFunctionStartsData(*Obj);
382   readDylibCodeSignDRs(*Obj);
383   readExportsTrie(*Obj);
384   readChainedFixups(*Obj);
385   readIndirectSymbolTable(*Obj);
386   readSwiftVersion(*Obj);
387   return std::move(Obj);
388 }
389