1 //===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ObjCopy/MachO/MachOObjcopy.h"
10 #include "Archive.h"
11 #include "MachOReader.h"
12 #include "MachOWriter.h"
13 #include "llvm/ADT/DenseSet.h"
14 #include "llvm/ObjCopy/CommonConfig.h"
15 #include "llvm/ObjCopy/MachO/MachOConfig.h"
16 #include "llvm/ObjCopy/MultiFormatConfig.h"
17 #include "llvm/ObjCopy/ObjCopy.h"
18 #include "llvm/Object/ArchiveWriter.h"
19 #include "llvm/Object/MachOUniversal.h"
20 #include "llvm/Object/MachOUniversalWriter.h"
21 #include "llvm/Support/Errc.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/FileOutputBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/SmallVectorMemoryBuffer.h"
26 
27 using namespace llvm;
28 using namespace llvm::objcopy;
29 using namespace llvm::objcopy::macho;
30 using namespace llvm::object;
31 
32 using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>;
33 using LoadCommandPred = std::function<bool(const LoadCommand &LC)>;
34 
35 #ifndef NDEBUG
36 static bool isLoadCommandWithPayloadString(const LoadCommand &LC) {
37   // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and
38   // LC_LAZY_LOAD_DYLIB
39   return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH ||
40          LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB ||
41          LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB ||
42          LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB;
43 }
44 #endif
45 
46 static StringRef getPayloadString(const LoadCommand &LC) {
47   assert(isLoadCommandWithPayloadString(LC) &&
48          "unsupported load command encountered");
49 
50   return StringRef(reinterpret_cast<const char *>(LC.Payload.data()),
51                    LC.Payload.size())
52       .rtrim('\0');
53 }
54 
55 static Error removeSections(const CommonConfig &Config, Object &Obj) {
56   SectionPred RemovePred = [](const std::unique_ptr<Section> &) {
57     return false;
58   };
59 
60   if (!Config.ToRemove.empty()) {
61     RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) {
62       return Config.ToRemove.matches(Sec->CanonicalName);
63     };
64   }
65 
66   if (Config.StripAll || Config.StripDebug) {
67     // Remove all debug sections.
68     RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) {
69       if (Sec->Segname == "__DWARF")
70         return true;
71 
72       return RemovePred(Sec);
73     };
74   }
75 
76   if (!Config.OnlySection.empty()) {
77     // Overwrite RemovePred because --only-section takes priority.
78     RemovePred = [&Config](const std::unique_ptr<Section> &Sec) {
79       return !Config.OnlySection.matches(Sec->CanonicalName);
80     };
81   }
82 
83   return Obj.removeSections(RemovePred);
84 }
85 
86 static void markSymbols(const CommonConfig &, Object &Obj) {
87   // Symbols referenced from the indirect symbol table must not be removed.
88   for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols)
89     if (ISE.Symbol)
90       (*ISE.Symbol)->Referenced = true;
91 }
92 
93 static void updateAndRemoveSymbols(const CommonConfig &Config,
94                                    const MachOConfig &MachOConfig,
95                                    Object &Obj) {
96   for (SymbolEntry &Sym : Obj.SymTable) {
97     auto I = Config.SymbolsToRename.find(Sym.Name);
98     if (I != Config.SymbolsToRename.end())
99       Sym.Name = std::string(I->getValue());
100   }
101 
102   auto RemovePred = [&Config, &MachOConfig,
103                      &Obj](const std::unique_ptr<SymbolEntry> &N) {
104     if (N->Referenced)
105       return false;
106     if (MachOConfig.KeepUndefined && N->isUndefinedSymbol())
107       return false;
108     if (N->n_desc & MachO::REFERENCED_DYNAMICALLY)
109       return false;
110     if (Config.StripAll)
111       return true;
112     if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT))
113       return true;
114     // This behavior is consistent with cctools' strip.
115     if (Config.StripDebug && (N->n_type & MachO::N_STAB))
116       return true;
117     // This behavior is consistent with cctools' strip.
118     if (MachOConfig.StripSwiftSymbols &&
119         (Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion &&
120         *Obj.SwiftVersion && N->isSwiftSymbol())
121       return true;
122     return false;
123   };
124 
125   Obj.SymTable.removeSymbols(RemovePred);
126 }
127 
128 template <typename LCType>
129 static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) {
130   assert(isLoadCommandWithPayloadString(LC) &&
131          "unsupported load command encountered");
132 
133   uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8);
134 
135   LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize;
136   LC.Payload.assign(NewCmdsize - sizeof(LCType), 0);
137   std::copy(S.begin(), S.end(), LC.Payload.begin());
138 }
139 
140 static LoadCommand buildRPathLoadCommand(StringRef Path) {
141   LoadCommand LC;
142   MachO::rpath_command RPathLC;
143   RPathLC.cmd = MachO::LC_RPATH;
144   RPathLC.path = sizeof(MachO::rpath_command);
145   RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8);
146   LC.MachOLoadCommand.rpath_command_data = RPathLC;
147   LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0);
148   std::copy(Path.begin(), Path.end(), LC.Payload.begin());
149   return LC;
150 }
151 
152 static Error processLoadCommands(const MachOConfig &MachOConfig, Object &Obj) {
153   // Remove RPaths.
154   DenseSet<StringRef> RPathsToRemove(MachOConfig.RPathsToRemove.begin(),
155                                      MachOConfig.RPathsToRemove.end());
156 
157   LoadCommandPred RemovePred = [&RPathsToRemove,
158                                 &MachOConfig](const LoadCommand &LC) {
159     if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) {
160       // When removing all RPaths we don't need to care
161       // about what it contains
162       if (MachOConfig.RemoveAllRpaths)
163         return true;
164 
165       StringRef RPath = getPayloadString(LC);
166       if (RPathsToRemove.count(RPath)) {
167         RPathsToRemove.erase(RPath);
168         return true;
169       }
170     }
171     return false;
172   };
173 
174   if (Error E = Obj.removeLoadCommands(RemovePred))
175     return E;
176 
177   // Emit an error if the Mach-O binary does not contain an rpath path name
178   // specified in -delete_rpath.
179   for (StringRef RPath : MachOConfig.RPathsToRemove) {
180     if (RPathsToRemove.count(RPath))
181       return createStringError(errc::invalid_argument,
182                                "no LC_RPATH load command with path: %s",
183                                RPath.str().c_str());
184   }
185 
186   DenseSet<StringRef> RPaths;
187 
188   // Get all existing RPaths.
189   for (LoadCommand &LC : Obj.LoadCommands) {
190     if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH)
191       RPaths.insert(getPayloadString(LC));
192   }
193 
194   // Throw errors for invalid RPaths.
195   for (const auto &OldNew : MachOConfig.RPathsToUpdate) {
196     StringRef Old = OldNew.getFirst();
197     StringRef New = OldNew.getSecond();
198     if (!RPaths.contains(Old))
199       return createStringError(errc::invalid_argument,
200                                "no LC_RPATH load command with path: " + Old);
201     if (RPaths.contains(New))
202       return createStringError(errc::invalid_argument,
203                                "rpath '" + New +
204                                    "' would create a duplicate load command");
205   }
206 
207   // Update load commands.
208   for (LoadCommand &LC : Obj.LoadCommands) {
209     switch (LC.MachOLoadCommand.load_command_data.cmd) {
210     case MachO::LC_ID_DYLIB:
211       if (MachOConfig.SharedLibId)
212         updateLoadCommandPayloadString<MachO::dylib_command>(
213             LC, *MachOConfig.SharedLibId);
214       break;
215 
216     case MachO::LC_RPATH: {
217       StringRef RPath = getPayloadString(LC);
218       StringRef NewRPath = MachOConfig.RPathsToUpdate.lookup(RPath);
219       if (!NewRPath.empty())
220         updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath);
221       break;
222     }
223 
224     // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB
225     // here once llvm-objcopy supports them.
226     case MachO::LC_LOAD_DYLIB:
227     case MachO::LC_LOAD_WEAK_DYLIB:
228       StringRef InstallName = getPayloadString(LC);
229       StringRef NewInstallName =
230           MachOConfig.InstallNamesToUpdate.lookup(InstallName);
231       if (!NewInstallName.empty())
232         updateLoadCommandPayloadString<MachO::dylib_command>(LC,
233                                                              NewInstallName);
234       break;
235     }
236   }
237 
238   // Add new RPaths.
239   for (StringRef RPath : MachOConfig.RPathToAdd) {
240     if (RPaths.contains(RPath))
241       return createStringError(errc::invalid_argument,
242                                "rpath '" + RPath +
243                                    "' would create a duplicate load command");
244     RPaths.insert(RPath);
245     Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath));
246   }
247 
248   for (StringRef RPath : MachOConfig.RPathToPrepend) {
249     if (RPaths.contains(RPath))
250       return createStringError(errc::invalid_argument,
251                                "rpath '" + RPath +
252                                    "' would create a duplicate load command");
253 
254     RPaths.insert(RPath);
255     Obj.LoadCommands.insert(Obj.LoadCommands.begin(),
256                             buildRPathLoadCommand(RPath));
257   }
258 
259   // Unlike appending rpaths, the indexes of subsequent load commands must
260   // be recalculated after prepending one.
261   if (!MachOConfig.RPathToPrepend.empty())
262     Obj.updateLoadCommandIndexes();
263 
264   // Remove any empty segments if required.
265   if (!MachOConfig.EmptySegmentsToRemove.empty()) {
266     auto RemovePred = [&MachOConfig](const LoadCommand &LC) {
267       if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT_64 ||
268           LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT) {
269         return LC.Sections.empty() &&
270                MachOConfig.EmptySegmentsToRemove.contains(*LC.getSegmentName());
271       }
272       return false;
273     };
274     if (Error E = Obj.removeLoadCommands(RemovePred))
275       return E;
276   }
277 
278   return Error::success();
279 }
280 
281 static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
282                                Object &Obj) {
283   for (LoadCommand &LC : Obj.LoadCommands)
284     for (const std::unique_ptr<Section> &Sec : LC.Sections) {
285       if (Sec->CanonicalName == SecName) {
286         Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
287             FileOutputBuffer::create(Filename, Sec->Content.size());
288         if (!BufferOrErr)
289           return BufferOrErr.takeError();
290         std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
291         llvm::copy(Sec->Content, Buf->getBufferStart());
292 
293         if (Error E = Buf->commit())
294           return E;
295         return Error::success();
296       }
297     }
298 
299   return createStringError(object_error::parse_failed, "section '%s' not found",
300                            SecName.str().c_str());
301 }
302 
303 static Error addSection(const NewSectionInfo &NewSection, Object &Obj) {
304   std::pair<StringRef, StringRef> Pair = NewSection.SectionName.split(',');
305   StringRef TargetSegName = Pair.first;
306   Section Sec(TargetSegName, Pair.second);
307   Sec.Content =
308       Obj.NewSectionsContents.save(NewSection.SectionData->getBuffer());
309   Sec.Size = Sec.Content.size();
310 
311   // Add the a section into an existing segment.
312   for (LoadCommand &LC : Obj.LoadCommands) {
313     std::optional<StringRef> SegName = LC.getSegmentName();
314     if (SegName && SegName == TargetSegName) {
315       uint64_t Addr = *LC.getSegmentVMAddr();
316       for (const std::unique_ptr<Section> &S : LC.Sections)
317         Addr = std::max(Addr, S->Addr + S->Size);
318       LC.Sections.push_back(std::make_unique<Section>(Sec));
319       LC.Sections.back()->Addr = Addr;
320       return Error::success();
321     }
322   }
323 
324   // There's no segment named TargetSegName. Create a new load command and
325   // Insert a new section into it.
326   LoadCommand &NewSegment =
327       Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384));
328   NewSegment.Sections.push_back(std::make_unique<Section>(Sec));
329   NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr();
330   return Error::success();
331 }
332 
333 static Expected<Section &> findSection(StringRef SecName, Object &O) {
334   StringRef SegName;
335   std::tie(SegName, SecName) = SecName.split(",");
336   auto FoundSeg =
337       llvm::find_if(O.LoadCommands, [SegName](const LoadCommand &LC) {
338         return LC.getSegmentName() == SegName;
339       });
340   if (FoundSeg == O.LoadCommands.end())
341     return createStringError(errc::invalid_argument,
342                              "could not find segment with name '%s'",
343                              SegName.str().c_str());
344   auto FoundSec = llvm::find_if(FoundSeg->Sections,
345                                 [SecName](const std::unique_ptr<Section> &Sec) {
346                                   return Sec->Sectname == SecName;
347                                 });
348   if (FoundSec == FoundSeg->Sections.end())
349     return createStringError(errc::invalid_argument,
350                              "could not find section with name '%s'",
351                              SecName.str().c_str());
352 
353   assert(FoundSec->get()->CanonicalName == (SegName + "," + SecName).str());
354   return **FoundSec;
355 }
356 
357 static Error updateSection(const NewSectionInfo &NewSection, Object &O) {
358   Expected<Section &> SecToUpdateOrErr = findSection(NewSection.SectionName, O);
359 
360   if (!SecToUpdateOrErr)
361     return SecToUpdateOrErr.takeError();
362   Section &Sec = *SecToUpdateOrErr;
363 
364   if (NewSection.SectionData->getBufferSize() > Sec.Size)
365     return createStringError(
366         errc::invalid_argument,
367         "new section cannot be larger than previous section");
368   Sec.Content = O.NewSectionsContents.save(NewSection.SectionData->getBuffer());
369   Sec.Size = Sec.Content.size();
370   return Error::success();
371 }
372 
373 // isValidMachOCannonicalName returns success if Name is a MachO cannonical name
374 // ("<segment>,<section>") and lengths of both segment and section names are
375 // valid.
376 static Error isValidMachOCannonicalName(StringRef Name) {
377   if (Name.count(',') != 1)
378     return createStringError(errc::invalid_argument,
379                              "invalid section name '%s' (should be formatted "
380                              "as '<segment name>,<section name>')",
381                              Name.str().c_str());
382 
383   std::pair<StringRef, StringRef> Pair = Name.split(',');
384   if (Pair.first.size() > 16)
385     return createStringError(errc::invalid_argument,
386                              "too long segment name: '%s'",
387                              Pair.first.str().c_str());
388   if (Pair.second.size() > 16)
389     return createStringError(errc::invalid_argument,
390                              "too long section name: '%s'",
391                              Pair.second.str().c_str());
392   return Error::success();
393 }
394 
395 static Error handleArgs(const CommonConfig &Config,
396                         const MachOConfig &MachOConfig, Object &Obj) {
397   // Dump sections before add/remove for compatibility with GNU objcopy.
398   for (StringRef Flag : Config.DumpSection) {
399     StringRef SectionName;
400     StringRef FileName;
401     std::tie(SectionName, FileName) = Flag.split('=');
402     if (Error E = dumpSectionToFile(SectionName, FileName, Obj))
403       return E;
404   }
405 
406   if (Error E = removeSections(Config, Obj))
407     return E;
408 
409   // Mark symbols to determine which symbols are still needed.
410   if (Config.StripAll)
411     markSymbols(Config, Obj);
412 
413   updateAndRemoveSymbols(Config, MachOConfig, Obj);
414 
415   if (Config.StripAll)
416     for (LoadCommand &LC : Obj.LoadCommands)
417       for (std::unique_ptr<Section> &Sec : LC.Sections)
418         Sec->Relocations.clear();
419 
420   for (const NewSectionInfo &NewSection : Config.AddSection) {
421     if (Error E = isValidMachOCannonicalName(NewSection.SectionName))
422       return E;
423     if (Error E = addSection(NewSection, Obj))
424       return E;
425   }
426 
427   for (const NewSectionInfo &NewSection : Config.UpdateSection) {
428     if (Error E = isValidMachOCannonicalName(NewSection.SectionName))
429       return E;
430     if (Error E = updateSection(NewSection, Obj))
431       return E;
432   }
433 
434   if (Error E = processLoadCommands(MachOConfig, Obj))
435     return E;
436 
437   return Error::success();
438 }
439 
440 Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config,
441                                              const MachOConfig &MachOConfig,
442                                              object::MachOObjectFile &In,
443                                              raw_ostream &Out) {
444   MachOReader Reader(In);
445   Expected<std::unique_ptr<Object>> O = Reader.create();
446   if (!O)
447     return createFileError(Config.InputFilename, O.takeError());
448 
449   if (O->get()->Header.FileType == MachO::HeaderFileType::MH_PRELOAD)
450     return createStringError(std::errc::not_supported,
451                              "%s: MH_PRELOAD files are not supported",
452                              Config.InputFilename.str().c_str());
453 
454   if (Error E = handleArgs(Config, MachOConfig, **O))
455     return createFileError(Config.InputFilename, std::move(E));
456 
457   // Page size used for alignment of segment sizes in Mach-O executables and
458   // dynamic libraries.
459   uint64_t PageSize;
460   switch (In.getArch()) {
461   case Triple::ArchType::arm:
462   case Triple::ArchType::aarch64:
463   case Triple::ArchType::aarch64_32:
464     PageSize = 16384;
465     break;
466   default:
467     PageSize = 4096;
468   }
469 
470   MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(),
471                      sys::path::filename(Config.OutputFilename), PageSize, Out);
472   if (auto E = Writer.finalize())
473     return E;
474   return Writer.write();
475 }
476 
477 Error objcopy::macho::executeObjcopyOnMachOUniversalBinary(
478     const MultiFormatConfig &Config, const MachOUniversalBinary &In,
479     raw_ostream &Out) {
480   SmallVector<OwningBinary<Binary>, 2> Binaries;
481   SmallVector<Slice, 2> Slices;
482   for (const auto &O : In.objects()) {
483     Expected<std::unique_ptr<Archive>> ArOrErr = O.getAsArchive();
484     if (ArOrErr) {
485       Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr =
486           createNewArchiveMembers(Config, **ArOrErr);
487       if (!NewArchiveMembersOrErr)
488         return NewArchiveMembersOrErr.takeError();
489       auto Kind = (*ArOrErr)->kind();
490       if (Kind == object::Archive::K_BSD)
491         Kind = object::Archive::K_DARWIN;
492       Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr =
493           writeArchiveToBuffer(*NewArchiveMembersOrErr,
494                                (*ArOrErr)->hasSymbolTable(), Kind,
495                                Config.getCommonConfig().DeterministicArchives,
496                                (*ArOrErr)->isThin());
497       if (!OutputBufferOrErr)
498         return OutputBufferOrErr.takeError();
499       Expected<std::unique_ptr<Binary>> BinaryOrErr =
500           object::createBinary(**OutputBufferOrErr);
501       if (!BinaryOrErr)
502         return BinaryOrErr.takeError();
503       Binaries.emplace_back(std::move(*BinaryOrErr),
504                             std::move(*OutputBufferOrErr));
505       Slices.emplace_back(*cast<Archive>(Binaries.back().getBinary()),
506                           O.getCPUType(), O.getCPUSubType(),
507                           O.getArchFlagName(), O.getAlign());
508       continue;
509     }
510     // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class
511     // ObjectForArch return an Error in case of the type mismatch. We need to
512     // check each in turn to see what kind of slice this is, so ignore errors
513     // produced along the way.
514     consumeError(ArOrErr.takeError());
515 
516     Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = O.getAsObjectFile();
517     if (!ObjOrErr) {
518       consumeError(ObjOrErr.takeError());
519       return createStringError(
520           std::errc::invalid_argument,
521           "slice for '%s' of the universal Mach-O binary "
522           "'%s' is not a Mach-O object or an archive",
523           O.getArchFlagName().c_str(),
524           Config.getCommonConfig().InputFilename.str().c_str());
525     }
526     std::string ArchFlagName = O.getArchFlagName();
527 
528     SmallVector<char, 0> Buffer;
529     raw_svector_ostream MemStream(Buffer);
530 
531     Expected<const MachOConfig &> MachO = Config.getMachOConfig();
532     if (!MachO)
533       return MachO.takeError();
534 
535     if (Error E = executeObjcopyOnBinary(Config.getCommonConfig(), *MachO,
536                                          **ObjOrErr, MemStream))
537       return E;
538 
539     auto MB = std::make_unique<SmallVectorMemoryBuffer>(
540         std::move(Buffer), ArchFlagName, /*RequiresNullTerminator=*/false);
541     Expected<std::unique_ptr<Binary>> BinaryOrErr = object::createBinary(*MB);
542     if (!BinaryOrErr)
543       return BinaryOrErr.takeError();
544     Binaries.emplace_back(std::move(*BinaryOrErr), std::move(MB));
545     Slices.emplace_back(*cast<MachOObjectFile>(Binaries.back().getBinary()),
546                         O.getAlign());
547   }
548 
549   if (Error Err = writeUniversalBinaryToStream(Slices, Out))
550     return Err;
551 
552   return Error::success();
553 }
554