1 //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MachOObject.h"
10 #include "llvm/ADT/SmallPtrSet.h"
11 #include <unordered_set>
12 
13 using namespace llvm;
14 using namespace llvm::objcopy::macho;
15 
16 Section::Section(StringRef SegName, StringRef SectName)
17     : Segname(SegName), Sectname(SectName),
18       CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {}
19 
20 Section::Section(StringRef SegName, StringRef SectName, StringRef Content)
21     : Segname(SegName), Sectname(SectName),
22       CanonicalName((Twine(SegName) + Twine(',') + SectName).str()),
23       Content(Content) {}
24 
25 const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
26   assert(Index < Symbols.size() && "invalid symbol index");
27   return Symbols[Index].get();
28 }
29 
30 SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
31   return const_cast<SymbolEntry *>(
32       static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
33 }
34 
35 void SymbolTable::removeSymbols(
36     function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
37   llvm::erase_if(Symbols, ToRemove);
38 }
39 
40 void Object::updateLoadCommandIndexes() {
41   static constexpr char TextSegmentName[] = "__TEXT";
42   // Update indices of special load commands
43   for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
44     LoadCommand &LC = LoadCommands[Index];
45     switch (LC.MachOLoadCommand.load_command_data.cmd) {
46     case MachO::LC_CODE_SIGNATURE:
47       CodeSignatureCommandIndex = Index;
48       break;
49     case MachO::LC_SEGMENT:
50       if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
51           TextSegmentName)
52         TextSegmentCommandIndex = Index;
53       break;
54     case MachO::LC_SEGMENT_64:
55       if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
56           TextSegmentName)
57         TextSegmentCommandIndex = Index;
58       break;
59     case MachO::LC_SYMTAB:
60       SymTabCommandIndex = Index;
61       break;
62     case MachO::LC_DYSYMTAB:
63       DySymTabCommandIndex = Index;
64       break;
65     case MachO::LC_DYLD_INFO:
66     case MachO::LC_DYLD_INFO_ONLY:
67       DyLdInfoCommandIndex = Index;
68       break;
69     case MachO::LC_DATA_IN_CODE:
70       DataInCodeCommandIndex = Index;
71       break;
72     case MachO::LC_LINKER_OPTIMIZATION_HINT:
73       LinkerOptimizationHintCommandIndex = Index;
74       break;
75     case MachO::LC_FUNCTION_STARTS:
76       FunctionStartsCommandIndex = Index;
77       break;
78     case MachO::LC_DYLIB_CODE_SIGN_DRS:
79       DylibCodeSignDRsIndex = Index;
80       break;
81     case MachO::LC_DYLD_CHAINED_FIXUPS:
82       ChainedFixupsCommandIndex = Index;
83       break;
84     case MachO::LC_DYLD_EXPORTS_TRIE:
85       ExportsTrieCommandIndex = Index;
86       break;
87     }
88   }
89 }
90 
91 Error Object::removeLoadCommands(
92     function_ref<bool(const LoadCommand &)> ToRemove) {
93   auto It = std::stable_partition(
94       LoadCommands.begin(), LoadCommands.end(),
95       [&](const LoadCommand &LC) { return !ToRemove(LC); });
96   LoadCommands.erase(It, LoadCommands.end());
97 
98   updateLoadCommandIndexes();
99   return Error::success();
100 }
101 
102 Error Object::removeSections(
103     function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
104   DenseMap<uint32_t, const Section *> OldIndexToSection;
105   uint32_t NextSectionIndex = 1;
106   for (LoadCommand &LC : LoadCommands) {
107     auto It = std::stable_partition(
108         std::begin(LC.Sections), std::end(LC.Sections),
109         [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
110     for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
111       OldIndexToSection[(*I)->Index] = I->get();
112       (*I)->Index = NextSectionIndex++;
113     }
114     LC.Sections.erase(It, LC.Sections.end());
115   }
116 
117   auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
118     std::optional<uint32_t> Section = S->section();
119     return (Section && !OldIndexToSection.count(*Section));
120   };
121 
122   SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
123   for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
124     if (IsDead(Sym))
125       DeadSymbols.insert(Sym.get());
126 
127   for (const LoadCommand &LC : LoadCommands)
128     for (const std::unique_ptr<Section> &Sec : LC.Sections)
129       for (const RelocationInfo &R : Sec->Relocations)
130         if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
131           return createStringError(std::errc::invalid_argument,
132                                    "symbol '%s' defined in section with index "
133                                    "'%u' cannot be removed because it is "
134                                    "referenced by a relocation in section '%s'",
135                                    (*R.Symbol)->Name.c_str(),
136                                    *((*R.Symbol)->section()),
137                                    Sec->CanonicalName.c_str());
138   SymTable.removeSymbols(IsDead);
139   for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
140     if (S->section())
141       S->n_sect = OldIndexToSection[S->n_sect]->Index;
142   return Error::success();
143 }
144 
145 uint64_t Object::nextAvailableSegmentAddress() const {
146   uint64_t HeaderSize =
147       is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
148   uint64_t Addr = HeaderSize + Header.SizeOfCmds;
149   for (const LoadCommand &LC : LoadCommands) {
150     const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
151     switch (MLC.load_command_data.cmd) {
152     case MachO::LC_SEGMENT:
153       Addr = std::max(Addr,
154                       static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
155                           MLC.segment_command_data.vmsize);
156       break;
157     case MachO::LC_SEGMENT_64:
158       Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
159                                 MLC.segment_command_64_data.vmsize);
160       break;
161     default:
162       continue;
163     }
164   }
165   return Addr;
166 }
167 
168 template <typename SegmentType>
169 static void
170 constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
171                  StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
172   assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
173   memset(&Seg, 0, sizeof(SegmentType));
174   Seg.cmd = CmdType;
175   strncpy(Seg.segname, SegName.data(), SegName.size());
176   Seg.maxprot |=
177       (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
178   Seg.initprot |=
179       (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
180   Seg.vmaddr = SegVMAddr;
181   Seg.vmsize = SegVMSize;
182 }
183 
184 LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
185   LoadCommand LC;
186   const uint64_t SegVMAddr = nextAvailableSegmentAddress();
187   if (is64Bit())
188     constructSegment(LC.MachOLoadCommand.segment_command_64_data,
189                      MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
190   else
191     constructSegment(LC.MachOLoadCommand.segment_command_data,
192                      MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
193 
194   LoadCommands.push_back(std::move(LC));
195   return LoadCommands.back();
196 }
197 
198 /// Extracts a segment name from a string which is possibly non-null-terminated.
199 static StringRef extractSegmentName(const char *SegName) {
200   return StringRef(SegName,
201                    strnlen(SegName, sizeof(MachO::segment_command::segname)));
202 }
203 
204 std::optional<StringRef> LoadCommand::getSegmentName() const {
205   const MachO::macho_load_command &MLC = MachOLoadCommand;
206   switch (MLC.load_command_data.cmd) {
207   case MachO::LC_SEGMENT:
208     return extractSegmentName(MLC.segment_command_data.segname);
209   case MachO::LC_SEGMENT_64:
210     return extractSegmentName(MLC.segment_command_64_data.segname);
211   default:
212     return std::nullopt;
213   }
214 }
215 
216 std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
217   const MachO::macho_load_command &MLC = MachOLoadCommand;
218   switch (MLC.load_command_data.cmd) {
219   case MachO::LC_SEGMENT:
220     return MLC.segment_command_data.vmaddr;
221   case MachO::LC_SEGMENT_64:
222     return MLC.segment_command_64_data.vmaddr;
223   default:
224     return std::nullopt;
225   }
226 }
227