1 //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MachOObject.h"
10 #include "llvm/ADT/SmallPtrSet.h"
11 #include "llvm/Support/SystemZ/zOSSupport.h"
12 #include <unordered_set>
13 
14 using namespace llvm;
15 using namespace llvm::objcopy::macho;
16 
Section(StringRef SegName,StringRef SectName)17 Section::Section(StringRef SegName, StringRef SectName)
18     : Segname(SegName), Sectname(SectName),
19       CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {}
20 
Section(StringRef SegName,StringRef SectName,StringRef Content)21 Section::Section(StringRef SegName, StringRef SectName, StringRef Content)
22     : Segname(SegName), Sectname(SectName),
23       CanonicalName((Twine(SegName) + Twine(',') + SectName).str()),
24       Content(Content) {}
25 
getSymbolByIndex(uint32_t Index) const26 const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
27   assert(Index < Symbols.size() && "invalid symbol index");
28   return Symbols[Index].get();
29 }
30 
getSymbolByIndex(uint32_t Index)31 SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
32   return const_cast<SymbolEntry *>(
33       static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
34 }
35 
removeSymbols(function_ref<bool (const std::unique_ptr<SymbolEntry> &)> ToRemove)36 void SymbolTable::removeSymbols(
37     function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
38   llvm::erase_if(Symbols, ToRemove);
39 }
40 
updateLoadCommandIndexes()41 void Object::updateLoadCommandIndexes() {
42   static constexpr char TextSegmentName[] = "__TEXT";
43   // Update indices of special load commands
44   for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
45     LoadCommand &LC = LoadCommands[Index];
46     switch (LC.MachOLoadCommand.load_command_data.cmd) {
47     case MachO::LC_CODE_SIGNATURE:
48       CodeSignatureCommandIndex = Index;
49       break;
50     case MachO::LC_SEGMENT:
51       if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
52           TextSegmentName)
53         TextSegmentCommandIndex = Index;
54       break;
55     case MachO::LC_SEGMENT_64:
56       if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
57           TextSegmentName)
58         TextSegmentCommandIndex = Index;
59       break;
60     case MachO::LC_SYMTAB:
61       SymTabCommandIndex = Index;
62       break;
63     case MachO::LC_DYSYMTAB:
64       DySymTabCommandIndex = Index;
65       break;
66     case MachO::LC_DYLD_INFO:
67     case MachO::LC_DYLD_INFO_ONLY:
68       DyLdInfoCommandIndex = Index;
69       break;
70     case MachO::LC_DATA_IN_CODE:
71       DataInCodeCommandIndex = Index;
72       break;
73     case MachO::LC_LINKER_OPTIMIZATION_HINT:
74       LinkerOptimizationHintCommandIndex = Index;
75       break;
76     case MachO::LC_FUNCTION_STARTS:
77       FunctionStartsCommandIndex = Index;
78       break;
79     case MachO::LC_DYLIB_CODE_SIGN_DRS:
80       DylibCodeSignDRsIndex = Index;
81       break;
82     case MachO::LC_DYLD_CHAINED_FIXUPS:
83       ChainedFixupsCommandIndex = Index;
84       break;
85     case MachO::LC_DYLD_EXPORTS_TRIE:
86       ExportsTrieCommandIndex = Index;
87       break;
88     }
89   }
90 }
91 
removeLoadCommands(function_ref<bool (const LoadCommand &)> ToRemove)92 Error Object::removeLoadCommands(
93     function_ref<bool(const LoadCommand &)> ToRemove) {
94   auto It = std::stable_partition(
95       LoadCommands.begin(), LoadCommands.end(),
96       [&](const LoadCommand &LC) { return !ToRemove(LC); });
97   LoadCommands.erase(It, LoadCommands.end());
98 
99   updateLoadCommandIndexes();
100   return Error::success();
101 }
102 
removeSections(function_ref<bool (const std::unique_ptr<Section> &)> ToRemove)103 Error Object::removeSections(
104     function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
105   DenseMap<uint32_t, const Section *> OldIndexToSection;
106   uint32_t NextSectionIndex = 1;
107   for (LoadCommand &LC : LoadCommands) {
108     auto It = std::stable_partition(
109         std::begin(LC.Sections), std::end(LC.Sections),
110         [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
111     for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
112       OldIndexToSection[(*I)->Index] = I->get();
113       (*I)->Index = NextSectionIndex++;
114     }
115     LC.Sections.erase(It, LC.Sections.end());
116   }
117 
118   auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
119     std::optional<uint32_t> Section = S->section();
120     return (Section && !OldIndexToSection.count(*Section));
121   };
122 
123   SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
124   for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
125     if (IsDead(Sym))
126       DeadSymbols.insert(Sym.get());
127 
128   for (const LoadCommand &LC : LoadCommands)
129     for (const std::unique_ptr<Section> &Sec : LC.Sections)
130       for (const RelocationInfo &R : Sec->Relocations)
131         if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
132           return createStringError(std::errc::invalid_argument,
133                                    "symbol '%s' defined in section with index "
134                                    "'%u' cannot be removed because it is "
135                                    "referenced by a relocation in section '%s'",
136                                    (*R.Symbol)->Name.c_str(),
137                                    *((*R.Symbol)->section()),
138                                    Sec->CanonicalName.c_str());
139   SymTable.removeSymbols(IsDead);
140   for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
141     if (S->section())
142       S->n_sect = OldIndexToSection[S->n_sect]->Index;
143   return Error::success();
144 }
145 
nextAvailableSegmentAddress() const146 uint64_t Object::nextAvailableSegmentAddress() const {
147   uint64_t HeaderSize =
148       is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
149   uint64_t Addr = HeaderSize + Header.SizeOfCmds;
150   for (const LoadCommand &LC : LoadCommands) {
151     const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
152     switch (MLC.load_command_data.cmd) {
153     case MachO::LC_SEGMENT:
154       Addr = std::max(Addr,
155                       static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
156                           MLC.segment_command_data.vmsize);
157       break;
158     case MachO::LC_SEGMENT_64:
159       Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
160                                 MLC.segment_command_64_data.vmsize);
161       break;
162     default:
163       continue;
164     }
165   }
166   return Addr;
167 }
168 
169 template <typename SegmentType>
170 static void
constructSegment(SegmentType & Seg,llvm::MachO::LoadCommandType CmdType,StringRef SegName,uint64_t SegVMAddr,uint64_t SegVMSize)171 constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
172                  StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
173   assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
174   memset(&Seg, 0, sizeof(SegmentType));
175   Seg.cmd = CmdType;
176   strncpy(Seg.segname, SegName.data(), SegName.size());
177   Seg.maxprot |=
178       (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
179   Seg.initprot |=
180       (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
181   Seg.vmaddr = SegVMAddr;
182   Seg.vmsize = SegVMSize;
183 }
184 
addSegment(StringRef SegName,uint64_t SegVMSize)185 LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
186   LoadCommand LC;
187   const uint64_t SegVMAddr = nextAvailableSegmentAddress();
188   if (is64Bit())
189     constructSegment(LC.MachOLoadCommand.segment_command_64_data,
190                      MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
191   else
192     constructSegment(LC.MachOLoadCommand.segment_command_data,
193                      MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
194 
195   LoadCommands.push_back(std::move(LC));
196   return LoadCommands.back();
197 }
198 
199 /// Extracts a segment name from a string which is possibly non-null-terminated.
extractSegmentName(const char * SegName)200 static StringRef extractSegmentName(const char *SegName) {
201   return StringRef(SegName,
202                    strnlen(SegName, sizeof(MachO::segment_command::segname)));
203 }
204 
getSegmentName() const205 std::optional<StringRef> LoadCommand::getSegmentName() const {
206   const MachO::macho_load_command &MLC = MachOLoadCommand;
207   switch (MLC.load_command_data.cmd) {
208   case MachO::LC_SEGMENT:
209     return extractSegmentName(MLC.segment_command_data.segname);
210   case MachO::LC_SEGMENT_64:
211     return extractSegmentName(MLC.segment_command_64_data.segname);
212   default:
213     return std::nullopt;
214   }
215 }
216 
getSegmentVMAddr() const217 std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
218   const MachO::macho_load_command &MLC = MachOLoadCommand;
219   switch (MLC.load_command_data.cmd) {
220   case MachO::LC_SEGMENT:
221     return MLC.segment_command_data.vmaddr;
222   case MachO::LC_SEGMENT_64:
223     return MLC.segment_command_64_data.vmaddr;
224   default:
225     return std::nullopt;
226   }
227 }
228