1 //===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "MachOObjcopy.h"
10 #include "../llvm-objcopy.h"
11 #include "CommonConfig.h"
12 #include "MachOReader.h"
13 #include "MachOWriter.h"
14 #include "MultiFormatConfig.h"
15 #include "llvm/ADT/DenseSet.h"
16 #include "llvm/Object/ArchiveWriter.h"
17 #include "llvm/Object/MachOUniversal.h"
18 #include "llvm/Object/MachOUniversalWriter.h"
19 #include "llvm/Support/Errc.h"
20 #include "llvm/Support/Error.h"
21 #include "llvm/Support/FileOutputBuffer.h"
22 #include "llvm/Support/SmallVectorMemoryBuffer.h"
23
24 using namespace llvm;
25 using namespace llvm::objcopy;
26 using namespace llvm::objcopy::macho;
27 using namespace llvm::object;
28
29 using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>;
30 using LoadCommandPred = std::function<bool(const LoadCommand &LC)>;
31
32 #ifndef NDEBUG
isLoadCommandWithPayloadString(const LoadCommand & LC)33 static bool isLoadCommandWithPayloadString(const LoadCommand &LC) {
34 // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and
35 // LC_LAZY_LOAD_DYLIB
36 return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH ||
37 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB ||
38 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB ||
39 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB;
40 }
41 #endif
42
getPayloadString(const LoadCommand & LC)43 static StringRef getPayloadString(const LoadCommand &LC) {
44 assert(isLoadCommandWithPayloadString(LC) &&
45 "unsupported load command encountered");
46
47 return StringRef(reinterpret_cast<const char *>(LC.Payload.data()),
48 LC.Payload.size())
49 .rtrim('\0');
50 }
51
removeSections(const CommonConfig & Config,Object & Obj)52 static Error removeSections(const CommonConfig &Config, Object &Obj) {
53 SectionPred RemovePred = [](const std::unique_ptr<Section> &) {
54 return false;
55 };
56
57 if (!Config.ToRemove.empty()) {
58 RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) {
59 return Config.ToRemove.matches(Sec->CanonicalName);
60 };
61 }
62
63 if (Config.StripAll || Config.StripDebug) {
64 // Remove all debug sections.
65 RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) {
66 if (Sec->Segname == "__DWARF")
67 return true;
68
69 return RemovePred(Sec);
70 };
71 }
72
73 if (!Config.OnlySection.empty()) {
74 // Overwrite RemovePred because --only-section takes priority.
75 RemovePred = [&Config](const std::unique_ptr<Section> &Sec) {
76 return !Config.OnlySection.matches(Sec->CanonicalName);
77 };
78 }
79
80 return Obj.removeSections(RemovePred);
81 }
82
markSymbols(const CommonConfig &,Object & Obj)83 static void markSymbols(const CommonConfig &, Object &Obj) {
84 // Symbols referenced from the indirect symbol table must not be removed.
85 for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols)
86 if (ISE.Symbol)
87 (*ISE.Symbol)->Referenced = true;
88 }
89
updateAndRemoveSymbols(const CommonConfig & Config,Object & Obj)90 static void updateAndRemoveSymbols(const CommonConfig &Config, Object &Obj) {
91 for (SymbolEntry &Sym : Obj.SymTable) {
92 auto I = Config.SymbolsToRename.find(Sym.Name);
93 if (I != Config.SymbolsToRename.end())
94 Sym.Name = std::string(I->getValue());
95 }
96
97 auto RemovePred = [Config, &Obj](const std::unique_ptr<SymbolEntry> &N) {
98 if (N->Referenced)
99 return false;
100 if (Config.KeepUndefined && N->isUndefinedSymbol())
101 return false;
102 if (N->n_desc & MachO::REFERENCED_DYNAMICALLY)
103 return false;
104 if (Config.StripAll)
105 return true;
106 if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT))
107 return true;
108 // This behavior is consistent with cctools' strip.
109 if (Config.StripSwiftSymbols && (Obj.Header.Flags & MachO::MH_DYLDLINK) &&
110 Obj.SwiftVersion && *Obj.SwiftVersion && N->isSwiftSymbol())
111 return true;
112 return false;
113 };
114
115 Obj.SymTable.removeSymbols(RemovePred);
116 }
117
118 template <typename LCType>
updateLoadCommandPayloadString(LoadCommand & LC,StringRef S)119 static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) {
120 assert(isLoadCommandWithPayloadString(LC) &&
121 "unsupported load command encountered");
122
123 uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8);
124
125 LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize;
126 LC.Payload.assign(NewCmdsize - sizeof(LCType), 0);
127 std::copy(S.begin(), S.end(), LC.Payload.begin());
128 }
129
buildRPathLoadCommand(StringRef Path)130 static LoadCommand buildRPathLoadCommand(StringRef Path) {
131 LoadCommand LC;
132 MachO::rpath_command RPathLC;
133 RPathLC.cmd = MachO::LC_RPATH;
134 RPathLC.path = sizeof(MachO::rpath_command);
135 RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8);
136 LC.MachOLoadCommand.rpath_command_data = RPathLC;
137 LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0);
138 std::copy(Path.begin(), Path.end(), LC.Payload.begin());
139 return LC;
140 }
141
processLoadCommands(const CommonConfig & Config,Object & Obj)142 static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
143 // Remove RPaths.
144 DenseSet<StringRef> RPathsToRemove(Config.RPathsToRemove.begin(),
145 Config.RPathsToRemove.end());
146
147 LoadCommandPred RemovePred = [&RPathsToRemove,
148 &Config](const LoadCommand &LC) {
149 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) {
150 // When removing all RPaths we don't need to care
151 // about what it contains
152 if (Config.RemoveAllRpaths)
153 return true;
154
155 StringRef RPath = getPayloadString(LC);
156 if (RPathsToRemove.count(RPath)) {
157 RPathsToRemove.erase(RPath);
158 return true;
159 }
160 }
161 return false;
162 };
163
164 if (Error E = Obj.removeLoadCommands(RemovePred))
165 return E;
166
167 // Emit an error if the Mach-O binary does not contain an rpath path name
168 // specified in -delete_rpath.
169 for (StringRef RPath : Config.RPathsToRemove) {
170 if (RPathsToRemove.count(RPath))
171 return createStringError(errc::invalid_argument,
172 "no LC_RPATH load command with path: %s",
173 RPath.str().c_str());
174 }
175
176 DenseSet<StringRef> RPaths;
177
178 // Get all existing RPaths.
179 for (LoadCommand &LC : Obj.LoadCommands) {
180 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH)
181 RPaths.insert(getPayloadString(LC));
182 }
183
184 // Throw errors for invalid RPaths.
185 for (const auto &OldNew : Config.RPathsToUpdate) {
186 StringRef Old = OldNew.getFirst();
187 StringRef New = OldNew.getSecond();
188 if (!RPaths.contains(Old))
189 return createStringError(errc::invalid_argument,
190 "no LC_RPATH load command with path: " + Old);
191 if (RPaths.contains(New))
192 return createStringError(errc::invalid_argument,
193 "rpath '" + New +
194 "' would create a duplicate load command");
195 }
196
197 // Update load commands.
198 for (LoadCommand &LC : Obj.LoadCommands) {
199 switch (LC.MachOLoadCommand.load_command_data.cmd) {
200 case MachO::LC_ID_DYLIB:
201 if (Config.SharedLibId)
202 updateLoadCommandPayloadString<MachO::dylib_command>(
203 LC, *Config.SharedLibId);
204 break;
205
206 case MachO::LC_RPATH: {
207 StringRef RPath = getPayloadString(LC);
208 StringRef NewRPath = Config.RPathsToUpdate.lookup(RPath);
209 if (!NewRPath.empty())
210 updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath);
211 break;
212 }
213
214 // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB
215 // here once llvm-objcopy supports them.
216 case MachO::LC_LOAD_DYLIB:
217 case MachO::LC_LOAD_WEAK_DYLIB:
218 StringRef InstallName = getPayloadString(LC);
219 StringRef NewInstallName =
220 Config.InstallNamesToUpdate.lookup(InstallName);
221 if (!NewInstallName.empty())
222 updateLoadCommandPayloadString<MachO::dylib_command>(LC,
223 NewInstallName);
224 break;
225 }
226 }
227
228 // Add new RPaths.
229 for (StringRef RPath : Config.RPathToAdd) {
230 if (RPaths.contains(RPath))
231 return createStringError(errc::invalid_argument,
232 "rpath '" + RPath +
233 "' would create a duplicate load command");
234 RPaths.insert(RPath);
235 Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath));
236 }
237
238 for (StringRef RPath : Config.RPathToPrepend) {
239 if (RPaths.contains(RPath))
240 return createStringError(errc::invalid_argument,
241 "rpath '" + RPath +
242 "' would create a duplicate load command");
243
244 RPaths.insert(RPath);
245 Obj.LoadCommands.insert(Obj.LoadCommands.begin(),
246 buildRPathLoadCommand(RPath));
247 }
248
249 // Unlike appending rpaths, the indexes of subsequent load commands must
250 // be recalculated after prepending one.
251 if (!Config.RPathToPrepend.empty())
252 Obj.updateLoadCommandIndexes();
253
254 return Error::success();
255 }
256
dumpSectionToFile(StringRef SecName,StringRef Filename,Object & Obj)257 static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
258 Object &Obj) {
259 for (LoadCommand &LC : Obj.LoadCommands)
260 for (const std::unique_ptr<Section> &Sec : LC.Sections) {
261 if (Sec->CanonicalName == SecName) {
262 Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
263 FileOutputBuffer::create(Filename, Sec->Content.size());
264 if (!BufferOrErr)
265 return BufferOrErr.takeError();
266 std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
267 llvm::copy(Sec->Content, Buf->getBufferStart());
268
269 if (Error E = Buf->commit())
270 return E;
271 return Error::success();
272 }
273 }
274
275 return createStringError(object_error::parse_failed, "section '%s' not found",
276 SecName.str().c_str());
277 }
278
addSection(StringRef SecName,StringRef Filename,Object & Obj)279 static Error addSection(StringRef SecName, StringRef Filename, Object &Obj) {
280 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
281 MemoryBuffer::getFile(Filename);
282 if (!BufOrErr)
283 return createFileError(Filename, errorCodeToError(BufOrErr.getError()));
284 std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
285
286 std::pair<StringRef, StringRef> Pair = SecName.split(',');
287 StringRef TargetSegName = Pair.first;
288 Section Sec(TargetSegName, Pair.second);
289 Sec.Content = Obj.NewSectionsContents.save(Buf->getBuffer());
290 Sec.Size = Sec.Content.size();
291
292 // Add the a section into an existing segment.
293 for (LoadCommand &LC : Obj.LoadCommands) {
294 Optional<StringRef> SegName = LC.getSegmentName();
295 if (SegName && SegName == TargetSegName) {
296 uint64_t Addr = *LC.getSegmentVMAddr();
297 for (const std::unique_ptr<Section> &S : LC.Sections)
298 Addr = std::max(Addr, S->Addr + S->Size);
299 LC.Sections.push_back(std::make_unique<Section>(Sec));
300 LC.Sections.back()->Addr = Addr;
301 return Error::success();
302 }
303 }
304
305 // There's no segment named TargetSegName. Create a new load command and
306 // Insert a new section into it.
307 LoadCommand &NewSegment =
308 Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384));
309 NewSegment.Sections.push_back(std::make_unique<Section>(Sec));
310 NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr();
311 return Error::success();
312 }
313
314 // isValidMachOCannonicalName returns success if Name is a MachO cannonical name
315 // ("<segment>,<section>") and lengths of both segment and section names are
316 // valid.
isValidMachOCannonicalName(StringRef Name)317 static Error isValidMachOCannonicalName(StringRef Name) {
318 if (Name.count(',') != 1)
319 return createStringError(errc::invalid_argument,
320 "invalid section name '%s' (should be formatted "
321 "as '<segment name>,<section name>')",
322 Name.str().c_str());
323
324 std::pair<StringRef, StringRef> Pair = Name.split(',');
325 if (Pair.first.size() > 16)
326 return createStringError(errc::invalid_argument,
327 "too long segment name: '%s'",
328 Pair.first.str().c_str());
329 if (Pair.second.size() > 16)
330 return createStringError(errc::invalid_argument,
331 "too long section name: '%s'",
332 Pair.second.str().c_str());
333 return Error::success();
334 }
335
handleArgs(const CommonConfig & Config,Object & Obj)336 static Error handleArgs(const CommonConfig &Config, Object &Obj) {
337 // Dump sections before add/remove for compatibility with GNU objcopy.
338 for (StringRef Flag : Config.DumpSection) {
339 StringRef SectionName;
340 StringRef FileName;
341 std::tie(SectionName, FileName) = Flag.split('=');
342 if (Error E = dumpSectionToFile(SectionName, FileName, Obj))
343 return E;
344 }
345
346 if (Error E = removeSections(Config, Obj))
347 return E;
348
349 // Mark symbols to determine which symbols are still needed.
350 if (Config.StripAll)
351 markSymbols(Config, Obj);
352
353 updateAndRemoveSymbols(Config, Obj);
354
355 if (Config.StripAll)
356 for (LoadCommand &LC : Obj.LoadCommands)
357 for (std::unique_ptr<Section> &Sec : LC.Sections)
358 Sec->Relocations.clear();
359
360 for (const auto &Flag : Config.AddSection) {
361 std::pair<StringRef, StringRef> SecPair = Flag.split("=");
362 StringRef SecName = SecPair.first;
363 StringRef File = SecPair.second;
364 if (Error E = isValidMachOCannonicalName(SecName))
365 return E;
366 if (Error E = addSection(SecName, File, Obj))
367 return E;
368 }
369
370 if (Error E = processLoadCommands(Config, Obj))
371 return E;
372
373 return Error::success();
374 }
375
executeObjcopyOnBinary(const CommonConfig & Config,const MachOConfig &,object::MachOObjectFile & In,raw_ostream & Out)376 Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config,
377 const MachOConfig &,
378 object::MachOObjectFile &In,
379 raw_ostream &Out) {
380 MachOReader Reader(In);
381 Expected<std::unique_ptr<Object>> O = Reader.create();
382 if (!O)
383 return createFileError(Config.InputFilename, O.takeError());
384
385 if (Error E = handleArgs(Config, **O))
386 return createFileError(Config.InputFilename, std::move(E));
387
388 // Page size used for alignment of segment sizes in Mach-O executables and
389 // dynamic libraries.
390 uint64_t PageSize;
391 switch (In.getArch()) {
392 case Triple::ArchType::arm:
393 case Triple::ArchType::aarch64:
394 case Triple::ArchType::aarch64_32:
395 PageSize = 16384;
396 break;
397 default:
398 PageSize = 4096;
399 }
400
401 MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(), PageSize, Out);
402 if (auto E = Writer.finalize())
403 return E;
404 return Writer.write();
405 }
406
executeObjcopyOnMachOUniversalBinary(const MultiFormatConfig & Config,const MachOUniversalBinary & In,raw_ostream & Out)407 Error objcopy::macho::executeObjcopyOnMachOUniversalBinary(
408 const MultiFormatConfig &Config, const MachOUniversalBinary &In,
409 raw_ostream &Out) {
410 SmallVector<OwningBinary<Binary>, 2> Binaries;
411 SmallVector<Slice, 2> Slices;
412 for (const auto &O : In.objects()) {
413 Expected<std::unique_ptr<Archive>> ArOrErr = O.getAsArchive();
414 if (ArOrErr) {
415 Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr =
416 createNewArchiveMembers(Config, **ArOrErr);
417 if (!NewArchiveMembersOrErr)
418 return NewArchiveMembersOrErr.takeError();
419 Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr =
420 writeArchiveToBuffer(*NewArchiveMembersOrErr,
421 (*ArOrErr)->hasSymbolTable(), (*ArOrErr)->kind(),
422 Config.getCommonConfig().DeterministicArchives,
423 (*ArOrErr)->isThin());
424 if (!OutputBufferOrErr)
425 return OutputBufferOrErr.takeError();
426 Expected<std::unique_ptr<Binary>> BinaryOrErr =
427 object::createBinary(**OutputBufferOrErr);
428 if (!BinaryOrErr)
429 return BinaryOrErr.takeError();
430 Binaries.emplace_back(std::move(*BinaryOrErr),
431 std::move(*OutputBufferOrErr));
432 Slices.emplace_back(*cast<Archive>(Binaries.back().getBinary()),
433 O.getCPUType(), O.getCPUSubType(),
434 O.getArchFlagName(), O.getAlign());
435 continue;
436 }
437 // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class
438 // ObjectForArch return an Error in case of the type mismatch. We need to
439 // check each in turn to see what kind of slice this is, so ignore errors
440 // produced along the way.
441 consumeError(ArOrErr.takeError());
442
443 Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = O.getAsObjectFile();
444 if (!ObjOrErr) {
445 consumeError(ObjOrErr.takeError());
446 return createStringError(
447 std::errc::invalid_argument,
448 "slice for '%s' of the universal Mach-O binary "
449 "'%s' is not a Mach-O object or an archive",
450 O.getArchFlagName().c_str(),
451 Config.getCommonConfig().InputFilename.str().c_str());
452 }
453 std::string ArchFlagName = O.getArchFlagName();
454
455 SmallVector<char, 0> Buffer;
456 raw_svector_ostream MemStream(Buffer);
457
458 Expected<const MachOConfig &> MachO = Config.getMachOConfig();
459 if (!MachO)
460 return MachO.takeError();
461
462 if (Error E = executeObjcopyOnBinary(Config.getCommonConfig(), *MachO,
463 **ObjOrErr, MemStream))
464 return E;
465
466 std::unique_ptr<MemoryBuffer> MB =
467 std::make_unique<SmallVectorMemoryBuffer>(std::move(Buffer),
468 ArchFlagName);
469 Expected<std::unique_ptr<Binary>> BinaryOrErr = object::createBinary(*MB);
470 if (!BinaryOrErr)
471 return BinaryOrErr.takeError();
472 Binaries.emplace_back(std::move(*BinaryOrErr), std::move(MB));
473 Slices.emplace_back(*cast<MachOObjectFile>(Binaries.back().getBinary()),
474 O.getAlign());
475 }
476
477 if (Error Err = writeUniversalBinaryToStream(Slices, Out))
478 return Err;
479
480 return Error::success();
481 }
482