1 //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MachOObject.h"
10 #include "llvm/ADT/SmallPtrSet.h"
11 #include <unordered_set>
12 
13 using namespace llvm;
14 using namespace llvm::objcopy::macho;
15 
16 const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
17   assert(Index < Symbols.size() && "invalid symbol index");
18   return Symbols[Index].get();
19 }
20 
21 SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
22   return const_cast<SymbolEntry *>(
23       static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
24 }
25 
26 void SymbolTable::removeSymbols(
27     function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
28   llvm::erase_if(Symbols, ToRemove);
29 }
30 
31 void Object::updateLoadCommandIndexes() {
32   static constexpr char TextSegmentName[] = "__TEXT";
33   // Update indices of special load commands
34   for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
35     LoadCommand &LC = LoadCommands[Index];
36     switch (LC.MachOLoadCommand.load_command_data.cmd) {
37     case MachO::LC_CODE_SIGNATURE:
38       CodeSignatureCommandIndex = Index;
39       break;
40     case MachO::LC_SEGMENT:
41       if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
42           TextSegmentName)
43         TextSegmentCommandIndex = Index;
44       break;
45     case MachO::LC_SEGMENT_64:
46       if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
47           TextSegmentName)
48         TextSegmentCommandIndex = Index;
49       break;
50     case MachO::LC_SYMTAB:
51       SymTabCommandIndex = Index;
52       break;
53     case MachO::LC_DYSYMTAB:
54       DySymTabCommandIndex = Index;
55       break;
56     case MachO::LC_DYLD_INFO:
57     case MachO::LC_DYLD_INFO_ONLY:
58       DyLdInfoCommandIndex = Index;
59       break;
60     case MachO::LC_DATA_IN_CODE:
61       DataInCodeCommandIndex = Index;
62       break;
63     case MachO::LC_LINKER_OPTIMIZATION_HINT:
64       LinkerOptimizationHintCommandIndex = Index;
65       break;
66     case MachO::LC_FUNCTION_STARTS:
67       FunctionStartsCommandIndex = Index;
68       break;
69     case MachO::LC_DYLD_CHAINED_FIXUPS:
70       ChainedFixupsCommandIndex = Index;
71       break;
72     case MachO::LC_DYLD_EXPORTS_TRIE:
73       ExportsTrieCommandIndex = Index;
74       break;
75     }
76   }
77 }
78 
79 Error Object::removeLoadCommands(
80     function_ref<bool(const LoadCommand &)> ToRemove) {
81   auto It = std::stable_partition(
82       LoadCommands.begin(), LoadCommands.end(),
83       [&](const LoadCommand &LC) { return !ToRemove(LC); });
84   LoadCommands.erase(It, LoadCommands.end());
85 
86   updateLoadCommandIndexes();
87   return Error::success();
88 }
89 
90 Error Object::removeSections(
91     function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
92   DenseMap<uint32_t, const Section *> OldIndexToSection;
93   uint32_t NextSectionIndex = 1;
94   for (LoadCommand &LC : LoadCommands) {
95     auto It = std::stable_partition(
96         std::begin(LC.Sections), std::end(LC.Sections),
97         [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
98     for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
99       OldIndexToSection[(*I)->Index] = I->get();
100       (*I)->Index = NextSectionIndex++;
101     }
102     LC.Sections.erase(It, LC.Sections.end());
103   }
104 
105   auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
106     Optional<uint32_t> Section = S->section();
107     return (Section && !OldIndexToSection.count(*Section));
108   };
109 
110   SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
111   for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
112     if (IsDead(Sym))
113       DeadSymbols.insert(Sym.get());
114 
115   for (const LoadCommand &LC : LoadCommands)
116     for (const std::unique_ptr<Section> &Sec : LC.Sections)
117       for (const RelocationInfo &R : Sec->Relocations)
118         if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
119           return createStringError(std::errc::invalid_argument,
120                                    "symbol '%s' defined in section with index "
121                                    "'%u' cannot be removed because it is "
122                                    "referenced by a relocation in section '%s'",
123                                    (*R.Symbol)->Name.c_str(),
124                                    *((*R.Symbol)->section()),
125                                    Sec->CanonicalName.c_str());
126   SymTable.removeSymbols(IsDead);
127   for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
128     if (S->section())
129       S->n_sect = OldIndexToSection[S->n_sect]->Index;
130   return Error::success();
131 }
132 
133 uint64_t Object::nextAvailableSegmentAddress() const {
134   uint64_t HeaderSize =
135       is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
136   uint64_t Addr = HeaderSize + Header.SizeOfCmds;
137   for (const LoadCommand &LC : LoadCommands) {
138     const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
139     switch (MLC.load_command_data.cmd) {
140     case MachO::LC_SEGMENT:
141       Addr = std::max(Addr,
142                       static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
143                           MLC.segment_command_data.vmsize);
144       break;
145     case MachO::LC_SEGMENT_64:
146       Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
147                                 MLC.segment_command_64_data.vmsize);
148       break;
149     default:
150       continue;
151     }
152   }
153   return Addr;
154 }
155 
156 template <typename SegmentType>
157 static void
158 constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
159                  StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
160   assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
161   memset(&Seg, 0, sizeof(SegmentType));
162   Seg.cmd = CmdType;
163   strncpy(Seg.segname, SegName.data(), SegName.size());
164   Seg.maxprot |=
165       (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
166   Seg.initprot |=
167       (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
168   Seg.vmaddr = SegVMAddr;
169   Seg.vmsize = SegVMSize;
170 }
171 
172 LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
173   LoadCommand LC;
174   const uint64_t SegVMAddr = nextAvailableSegmentAddress();
175   if (is64Bit())
176     constructSegment(LC.MachOLoadCommand.segment_command_64_data,
177                      MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
178   else
179     constructSegment(LC.MachOLoadCommand.segment_command_data,
180                      MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
181 
182   LoadCommands.push_back(std::move(LC));
183   return LoadCommands.back();
184 }
185 
186 /// Extracts a segment name from a string which is possibly non-null-terminated.
187 static StringRef extractSegmentName(const char *SegName) {
188   return StringRef(SegName,
189                    strnlen(SegName, sizeof(MachO::segment_command::segname)));
190 }
191 
192 Optional<StringRef> LoadCommand::getSegmentName() const {
193   const MachO::macho_load_command &MLC = MachOLoadCommand;
194   switch (MLC.load_command_data.cmd) {
195   case MachO::LC_SEGMENT:
196     return extractSegmentName(MLC.segment_command_data.segname);
197   case MachO::LC_SEGMENT_64:
198     return extractSegmentName(MLC.segment_command_64_data.segname);
199   default:
200     return None;
201   }
202 }
203 
204 Optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
205   const MachO::macho_load_command &MLC = MachOLoadCommand;
206   switch (MLC.load_command_data.cmd) {
207   case MachO::LC_SEGMENT:
208     return MLC.segment_command_data.vmaddr;
209   case MachO::LC_SEGMENT_64:
210     return MLC.segment_command_64_data.vmaddr;
211   default:
212     return None;
213   }
214 }
215