1 //===- MachOReader.cpp ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MachOReader.h"
10 #include "Object.h"
11 #include "llvm/BinaryFormat/MachO.h"
12 #include "llvm/Object/MachO.h"
13 #include "llvm/Support/Errc.h"
14 #include <memory>
15 
16 namespace llvm {
17 namespace objcopy {
18 namespace macho {
19 
readHeader(Object & O) const20 void MachOReader::readHeader(Object &O) const {
21   O.Header.Magic = MachOObj.getHeader().magic;
22   O.Header.CPUType = MachOObj.getHeader().cputype;
23   O.Header.CPUSubType = MachOObj.getHeader().cpusubtype;
24   O.Header.FileType = MachOObj.getHeader().filetype;
25   O.Header.NCmds = MachOObj.getHeader().ncmds;
26   O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds;
27   O.Header.Flags = MachOObj.getHeader().flags;
28 }
29 
30 template <typename SectionType>
constructSectionCommon(SectionType Sec,uint32_t Index)31 Section constructSectionCommon(SectionType Sec, uint32_t Index) {
32   StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname)));
33   StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)));
34   Section S(SegName, SectName);
35   S.Index = Index;
36   S.Addr = Sec.addr;
37   S.Size = Sec.size;
38   S.OriginalOffset = Sec.offset;
39   S.Align = Sec.align;
40   S.RelOff = Sec.reloff;
41   S.NReloc = Sec.nreloc;
42   S.Flags = Sec.flags;
43   S.Reserved1 = Sec.reserved1;
44   S.Reserved2 = Sec.reserved2;
45   S.Reserved3 = 0;
46   return S;
47 }
48 
49 template <typename SectionType>
50 Section constructSection(SectionType Sec, uint32_t Index);
51 
constructSection(MachO::section Sec,uint32_t Index)52 template <> Section constructSection(MachO::section Sec, uint32_t Index) {
53   return constructSectionCommon(Sec, Index);
54 }
55 
constructSection(MachO::section_64 Sec,uint32_t Index)56 template <> Section constructSection(MachO::section_64 Sec, uint32_t Index) {
57   Section S = constructSectionCommon(Sec, Index);
58   S.Reserved3 = Sec.reserved3;
59   return S;
60 }
61 
62 template <typename SectionType, typename SegmentType>
63 Expected<std::vector<std::unique_ptr<Section>>>
extractSections(const object::MachOObjectFile::LoadCommandInfo & LoadCmd,const object::MachOObjectFile & MachOObj,uint32_t & NextSectionIndex)64 extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
65                 const object::MachOObjectFile &MachOObj,
66                 uint32_t &NextSectionIndex) {
67   auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
68   const SectionType *Curr =
69       reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
70   std::vector<std::unique_ptr<Section>> Sections;
71   for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
72     if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) {
73       SectionType Sec;
74       memcpy((void *)&Sec, Curr, sizeof(SectionType));
75       MachO::swapStruct(Sec);
76       Sections.push_back(
77           std::make_unique<Section>(constructSection(Sec, NextSectionIndex)));
78     } else {
79       Sections.push_back(
80           std::make_unique<Section>(constructSection(*Curr, NextSectionIndex)));
81     }
82 
83     Section &S = *Sections.back();
84 
85     Expected<object::SectionRef> SecRef =
86         MachOObj.getSection(NextSectionIndex++);
87     if (!SecRef)
88       return SecRef.takeError();
89 
90     Expected<ArrayRef<uint8_t>> Data =
91         MachOObj.getSectionContents(SecRef->getRawDataRefImpl());
92     if (!Data)
93       return Data.takeError();
94 
95     S.Content =
96         StringRef(reinterpret_cast<const char *>(Data->data()), Data->size());
97 
98     S.Relocations.reserve(S.NReloc);
99     for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()),
100               RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl());
101          RI != RE; ++RI) {
102       RelocationInfo R;
103       R.Symbol = nullptr; // We'll fill this field later.
104       R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl());
105       R.Scattered = MachOObj.isRelocationScattered(R.Info);
106       R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info);
107       S.Relocations.push_back(R);
108     }
109 
110     assert(S.NReloc == S.Relocations.size() &&
111            "Incorrect number of relocations");
112   }
113   return std::move(Sections);
114 }
115 
readLoadCommands(Object & O) const116 Error MachOReader::readLoadCommands(Object &O) const {
117   // For MachO sections indices start from 1.
118   uint32_t NextSectionIndex = 1;
119   for (auto LoadCmd : MachOObj.load_commands()) {
120     LoadCommand LC;
121     switch (LoadCmd.C.cmd) {
122     case MachO::LC_CODE_SIGNATURE:
123       O.CodeSignatureCommandIndex = O.LoadCommands.size();
124       break;
125     case MachO::LC_SEGMENT:
126       if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
127               extractSections<MachO::section, MachO::segment_command>(
128                   LoadCmd, MachOObj, NextSectionIndex))
129         LC.Sections = std::move(*Sections);
130       else
131         return Sections.takeError();
132       break;
133     case MachO::LC_SEGMENT_64:
134       if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
135               extractSections<MachO::section_64, MachO::segment_command_64>(
136                   LoadCmd, MachOObj, NextSectionIndex))
137         LC.Sections = std::move(*Sections);
138       else
139         return Sections.takeError();
140       break;
141     case MachO::LC_SYMTAB:
142       O.SymTabCommandIndex = O.LoadCommands.size();
143       break;
144     case MachO::LC_DYSYMTAB:
145       O.DySymTabCommandIndex = O.LoadCommands.size();
146       break;
147     case MachO::LC_DYLD_INFO:
148     case MachO::LC_DYLD_INFO_ONLY:
149       O.DyLdInfoCommandIndex = O.LoadCommands.size();
150       break;
151     case MachO::LC_DATA_IN_CODE:
152       O.DataInCodeCommandIndex = O.LoadCommands.size();
153       break;
154     case MachO::LC_FUNCTION_STARTS:
155       O.FunctionStartsCommandIndex = O.LoadCommands.size();
156       break;
157     }
158 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
159   case MachO::LCName:                                                          \
160     memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr,        \
161            sizeof(MachO::LCStruct));                                           \
162     if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)                  \
163       MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data);                  \
164     if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct))                           \
165       LC.Payload = ArrayRef<uint8_t>(                                          \
166           reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +       \
167               sizeof(MachO::LCStruct),                                         \
168           LoadCmd.C.cmdsize - sizeof(MachO::LCStruct));                        \
169     break;
170 
171     switch (LoadCmd.C.cmd) {
172     default:
173       memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr,
174              sizeof(MachO::load_command));
175       if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
176         MachO::swapStruct(LC.MachOLoadCommand.load_command_data);
177       if (LoadCmd.C.cmdsize > sizeof(MachO::load_command))
178         LC.Payload = ArrayRef<uint8_t>(
179             reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +
180                 sizeof(MachO::load_command),
181             LoadCmd.C.cmdsize - sizeof(MachO::load_command));
182       break;
183 #include "llvm/BinaryFormat/MachO.def"
184     }
185     O.LoadCommands.push_back(std::move(LC));
186   }
187   return Error::success();
188 }
189 
190 template <typename nlist_t>
constructSymbolEntry(StringRef StrTable,const nlist_t & nlist)191 SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) {
192   assert(nlist.n_strx < StrTable.size() &&
193          "n_strx exceeds the size of the string table");
194   SymbolEntry SE;
195   SE.Name = StringRef(StrTable.data() + nlist.n_strx).str();
196   SE.n_type = nlist.n_type;
197   SE.n_sect = nlist.n_sect;
198   SE.n_desc = nlist.n_desc;
199   SE.n_value = nlist.n_value;
200   return SE;
201 }
202 
readSymbolTable(Object & O) const203 void MachOReader::readSymbolTable(Object &O) const {
204   StringRef StrTable = MachOObj.getStringTableData();
205   for (auto Symbol : MachOObj.symbols()) {
206     SymbolEntry SE =
207         (MachOObj.is64Bit()
208              ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry(
209                                                   Symbol.getRawDataRefImpl()))
210              : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry(
211                                                   Symbol.getRawDataRefImpl())));
212 
213     O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE));
214   }
215 }
216 
setSymbolInRelocationInfo(Object & O) const217 void MachOReader::setSymbolInRelocationInfo(Object &O) const {
218   std::vector<const Section *> Sections;
219   for (auto &LC : O.LoadCommands)
220     for (std::unique_ptr<Section> &Sec : LC.Sections)
221       Sections.push_back(Sec.get());
222 
223   for (LoadCommand &LC : O.LoadCommands)
224     for (std::unique_ptr<Section> &Sec : LC.Sections)
225       for (auto &Reloc : Sec->Relocations)
226         if (!Reloc.Scattered) {
227           const uint32_t SymbolNum =
228               Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian());
229           if (Reloc.Extern) {
230             Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum);
231           } else {
232             // FIXME: Refactor error handling in MachOReader and report an error
233             // if we encounter an invalid relocation.
234             assert(SymbolNum >= 1 && SymbolNum <= Sections.size() &&
235                    "Invalid section index.");
236             Reloc.Sec = Sections[SymbolNum - 1];
237           }
238         }
239 }
240 
readRebaseInfo(Object & O) const241 void MachOReader::readRebaseInfo(Object &O) const {
242   O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes();
243 }
244 
readBindInfo(Object & O) const245 void MachOReader::readBindInfo(Object &O) const {
246   O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes();
247 }
248 
readWeakBindInfo(Object & O) const249 void MachOReader::readWeakBindInfo(Object &O) const {
250   O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes();
251 }
252 
readLazyBindInfo(Object & O) const253 void MachOReader::readLazyBindInfo(Object &O) const {
254   O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes();
255 }
256 
readExportInfo(Object & O) const257 void MachOReader::readExportInfo(Object &O) const {
258   O.Exports.Trie = MachOObj.getDyldInfoExportsTrie();
259 }
260 
readLinkData(Object & O,Optional<size_t> LCIndex,LinkData & LD) const261 void MachOReader::readLinkData(Object &O, Optional<size_t> LCIndex,
262                                LinkData &LD) const {
263   if (!LCIndex)
264     return;
265   const MachO::linkedit_data_command &LC =
266       O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data;
267   LD.Data =
268       arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize));
269 }
270 
readCodeSignature(Object & O) const271 void MachOReader::readCodeSignature(Object &O) const {
272   return readLinkData(O, O.CodeSignatureCommandIndex, O.CodeSignature);
273 }
274 
readDataInCodeData(Object & O) const275 void MachOReader::readDataInCodeData(Object &O) const {
276   return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode);
277 }
278 
readFunctionStartsData(Object & O) const279 void MachOReader::readFunctionStartsData(Object &O) const {
280   return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts);
281 }
282 
readIndirectSymbolTable(Object & O) const283 void MachOReader::readIndirectSymbolTable(Object &O) const {
284   MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
285   constexpr uint32_t AbsOrLocalMask =
286       MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS;
287   for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) {
288     uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i);
289     if ((Index & AbsOrLocalMask) != 0)
290       O.IndirectSymTable.Symbols.emplace_back(Index, None);
291     else
292       O.IndirectSymTable.Symbols.emplace_back(
293           Index, O.SymTable.getSymbolByIndex(Index));
294   }
295 }
296 
readSwiftVersion(Object & O) const297 void MachOReader::readSwiftVersion(Object &O) const {
298   struct ObjCImageInfo {
299     uint32_t Version;
300     uint32_t Flags;
301   } ImageInfo;
302 
303   for (const LoadCommand &LC : O.LoadCommands)
304     for (const std::unique_ptr<Section> &Sec : LC.Sections)
305       if (Sec->Sectname == "__objc_imageinfo" &&
306           (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" ||
307            Sec->Segname == "__DATA_DIRTY") &&
308           Sec->Content.size() >= sizeof(ObjCImageInfo)) {
309         memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo));
310         if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) {
311           sys::swapByteOrder(ImageInfo.Version);
312           sys::swapByteOrder(ImageInfo.Flags);
313         }
314         O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff;
315         return;
316       }
317 }
318 
create() const319 Expected<std::unique_ptr<Object>> MachOReader::create() const {
320   auto Obj = std::make_unique<Object>();
321   readHeader(*Obj);
322   if (Error E = readLoadCommands(*Obj))
323     return std::move(E);
324   readSymbolTable(*Obj);
325   setSymbolInRelocationInfo(*Obj);
326   readRebaseInfo(*Obj);
327   readBindInfo(*Obj);
328   readWeakBindInfo(*Obj);
329   readLazyBindInfo(*Obj);
330   readExportInfo(*Obj);
331   readCodeSignature(*Obj);
332   readDataInCodeData(*Obj);
333   readFunctionStartsData(*Obj);
334   readIndirectSymbolTable(*Obj);
335   readSwiftVersion(*Obj);
336   return std::move(Obj);
337 }
338 
339 } // end namespace macho
340 } // end namespace objcopy
341 } // end namespace llvm
342