1 //===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 ///
10 /// \file For mach-o object files, this implementation converts normalized
11 /// mach-o in memory to mach-o binary on disk.
12 ///
13 ///                 +---------------+
14 ///                 | binary mach-o |
15 ///                 +---------------+
16 ///                        ^
17 ///                        |
18 ///                        |
19 ///                  +------------+
20 ///                  | normalized |
21 ///                  +------------+
22 
23 #include "MachONormalizedFile.h"
24 #include "MachONormalizedFileBinaryUtils.h"
25 #include "lld/Common/LLVM.h"
26 #include "lld/Core/Error.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/ADT/ilist.h"
31 #include "llvm/ADT/ilist_node.h"
32 #include "llvm/BinaryFormat/MachO.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/FileOutputBuffer.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/Host.h"
40 #include "llvm/Support/MemoryBuffer.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include <functional>
43 #include <list>
44 #include <map>
45 #include <system_error>
46 
47 using namespace llvm::MachO;
48 
49 namespace lld {
50 namespace mach_o {
51 namespace normalized {
52 
53 struct TrieNode; // Forward declaration.
54 
55 struct TrieEdge : public llvm::ilist_node<TrieEdge> {
TrieEdgelld::mach_o::normalized::TrieEdge56   TrieEdge(StringRef s, TrieNode *node) : _subString(s), _child(node) {}
57 
58   StringRef          _subString;
59   struct TrieNode   *_child;
60 };
61 
62 } // namespace normalized
63 } // namespace mach_o
64 } // namespace lld
65 
66 
67 namespace llvm {
68 using lld::mach_o::normalized::TrieEdge;
69 template <>
70 struct ilist_alloc_traits<TrieEdge> : ilist_noalloc_traits<TrieEdge> {};
71 } // namespace llvm
72 
73 
74 namespace lld {
75 namespace mach_o {
76 namespace normalized {
77 
78 struct TrieNode {
79   typedef llvm::ilist<TrieEdge> TrieEdgeList;
80 
TrieNodelld::mach_o::normalized::TrieNode81   TrieNode(StringRef s)
82       : _cummulativeString(s), _address(0), _flags(0), _other(0),
83         _trieOffset(0), _hasExportInfo(false) {}
84   ~TrieNode() = default;
85 
86   void addSymbol(const Export &entry, BumpPtrAllocator &allocator,
87                  std::vector<TrieNode *> &allNodes);
88 
89   void addOrderedNodes(const Export &entry,
90                        std::vector<TrieNode *> &allNodes);
91   bool updateOffset(uint32_t &offset);
92   void appendToByteBuffer(ByteBuffer &out);
93 
94 private:
95   StringRef                 _cummulativeString;
96   TrieEdgeList              _children;
97   uint64_t                  _address;
98   uint64_t                  _flags;
99   uint64_t                  _other;
100   StringRef                 _importedName;
101   uint32_t                  _trieOffset;
102   bool                      _hasExportInfo;
103   bool                      _ordered = false;
104 };
105 
106 /// Utility class for writing a mach-o binary file given an in-memory
107 /// normalized file.
108 class MachOFileLayout {
109 public:
110   /// All layout computation is done in the constructor.
111   MachOFileLayout(const NormalizedFile &file, bool alwaysIncludeFunctionStarts);
112 
113   /// Returns the final file size as computed in the constructor.
114   size_t      size() const;
115 
116   // Returns size of the mach_header and load commands.
117   size_t      headerAndLoadCommandsSize() const;
118 
119   /// Writes the normalized file as a binary mach-o file to the specified
120   /// path.  This does not have a stream interface because the generated
121   /// file may need the 'x' bit set.
122   llvm::Error writeBinary(StringRef path);
123 
124 private:
125   uint32_t    loadCommandsSize(uint32_t &count,
126                                bool alwaysIncludeFunctionStarts);
127   void        buildFileOffsets();
128   void        writeMachHeader();
129   llvm::Error writeLoadCommands();
130   void        writeSectionContent();
131   void        writeRelocations();
132   void        writeSymbolTable();
133   void        writeRebaseInfo();
134   void        writeBindingInfo();
135   void        writeLazyBindingInfo();
136   void        writeExportInfo();
137   void        writeFunctionStartsInfo();
138   void        writeDataInCodeInfo();
139   void        writeLinkEditContent();
140   void        buildLinkEditInfo();
141   void        buildRebaseInfo();
142   void        buildBindInfo();
143   void        buildLazyBindInfo();
144   void        buildExportTrie();
145   void        computeFunctionStartsSize();
146   void        computeDataInCodeSize();
147   void        computeSymbolTableSizes();
148   void        buildSectionRelocations();
149   void        appendSymbols(const std::vector<Symbol> &symbols,
150                                       uint32_t &symOffset, uint32_t &strOffset);
151   uint32_t    indirectSymbolIndex(const Section &sect, uint32_t &index);
152   uint32_t    indirectSymbolElementSize(const Section &sect);
153 
154   // For use as template parameter to load command methods.
155   struct MachO64Trait {
156     typedef llvm::MachO::segment_command_64 command;
157     typedef llvm::MachO::section_64         section;
158     enum { LC = llvm::MachO::LC_SEGMENT_64 };
159   };
160 
161   // For use as template parameter to load command methods.
162   struct MachO32Trait {
163     typedef llvm::MachO::segment_command   command;
164     typedef llvm::MachO::section           section;
165     enum { LC = llvm::MachO::LC_SEGMENT };
166   };
167 
168   template <typename T>
169   llvm::Error writeSingleSegmentLoadCommand(uint8_t *&lc);
170   template <typename T> llvm::Error writeSegmentLoadCommands(uint8_t *&lc);
171 
172   uint32_t pointerAlign(uint32_t value);
173   static StringRef dyldPath();
174 
175   struct SegExtraInfo {
176     uint32_t                    fileOffset;
177     uint32_t                    fileSize;
178     std::vector<const Section*> sections;
179   };
180   typedef std::map<const Segment*, SegExtraInfo> SegMap;
181   struct SectionExtraInfo {
182     uint32_t                    fileOffset;
183   };
184   typedef std::map<const Section*, SectionExtraInfo> SectionMap;
185 
186   const NormalizedFile &_file;
187   std::error_code _ec;
188   uint8_t              *_buffer;
189   const bool            _is64;
190   const bool            _swap;
191   const bool            _bigEndianArch;
192   uint64_t              _seg1addr;
193   uint32_t              _startOfLoadCommands;
194   uint32_t              _countOfLoadCommands;
195   uint32_t              _endOfLoadCommands;
196   uint32_t              _startOfRelocations;
197   uint32_t              _startOfFunctionStarts;
198   uint32_t              _startOfDataInCode;
199   uint32_t              _startOfSymbols;
200   uint32_t              _startOfIndirectSymbols;
201   uint32_t              _startOfSymbolStrings;
202   uint32_t              _endOfSymbolStrings;
203   uint32_t              _symbolTableLocalsStartIndex;
204   uint32_t              _symbolTableGlobalsStartIndex;
205   uint32_t              _symbolTableUndefinesStartIndex;
206   uint32_t              _symbolStringPoolSize;
207   uint32_t              _symbolTableSize;
208   uint32_t              _functionStartsSize;
209   uint32_t              _dataInCodeSize;
210   uint32_t              _indirectSymbolTableCount;
211   // Used in object file creation only
212   uint32_t              _startOfSectionsContent;
213   uint32_t              _endOfSectionsContent;
214   // Used in final linked image only
215   uint32_t              _startOfLinkEdit;
216   uint32_t              _startOfRebaseInfo;
217   uint32_t              _endOfRebaseInfo;
218   uint32_t              _startOfBindingInfo;
219   uint32_t              _endOfBindingInfo;
220   uint32_t              _startOfLazyBindingInfo;
221   uint32_t              _endOfLazyBindingInfo;
222   uint32_t              _startOfExportTrie;
223   uint32_t              _endOfExportTrie;
224   uint32_t              _endOfLinkEdit;
225   uint64_t              _addressOfLinkEdit;
226   SegMap                _segInfo;
227   SectionMap            _sectInfo;
228   ByteBuffer            _rebaseInfo;
229   ByteBuffer            _bindingInfo;
230   ByteBuffer            _lazyBindingInfo;
231   ByteBuffer            _weakBindingInfo;
232   ByteBuffer            _exportTrie;
233 };
234 
headerAndLoadCommandsSize(const NormalizedFile & file,bool includeFunctionStarts)235 size_t headerAndLoadCommandsSize(const NormalizedFile &file,
236                                  bool includeFunctionStarts) {
237   MachOFileLayout layout(file, includeFunctionStarts);
238   return layout.headerAndLoadCommandsSize();
239 }
240 
dyldPath()241 StringRef MachOFileLayout::dyldPath() {
242   return "/usr/lib/dyld";
243 }
244 
pointerAlign(uint32_t value)245 uint32_t MachOFileLayout::pointerAlign(uint32_t value) {
246   return llvm::alignTo(value, _is64 ? 8 : 4);
247 }
248 
249 
headerAndLoadCommandsSize() const250 size_t MachOFileLayout::headerAndLoadCommandsSize() const {
251   return _endOfLoadCommands;
252 }
253 
MachOFileLayout(const NormalizedFile & file,bool alwaysIncludeFunctionStarts)254 MachOFileLayout::MachOFileLayout(const NormalizedFile &file,
255                                  bool alwaysIncludeFunctionStarts)
256     : _file(file),
257       _is64(MachOLinkingContext::is64Bit(file.arch)),
258       _swap(!MachOLinkingContext::isHostEndian(file.arch)),
259       _bigEndianArch(MachOLinkingContext::isBigEndian(file.arch)),
260       _seg1addr(INT64_MAX) {
261   _startOfLoadCommands = _is64 ? sizeof(mach_header_64) : sizeof(mach_header);
262   const size_t segCommandBaseSize =
263           (_is64 ? sizeof(segment_command_64) : sizeof(segment_command));
264   const size_t sectsSize = (_is64 ? sizeof(section_64) : sizeof(section));
265   if (file.fileType == llvm::MachO::MH_OBJECT) {
266     // object files have just one segment load command containing all sections
267     _endOfLoadCommands = _startOfLoadCommands
268                                + segCommandBaseSize
269                                + file.sections.size() * sectsSize
270                                + sizeof(symtab_command);
271     _countOfLoadCommands = 2;
272     if (file.hasMinVersionLoadCommand) {
273       _endOfLoadCommands += sizeof(version_min_command);
274       _countOfLoadCommands++;
275     }
276     if (!_file.functionStarts.empty() || alwaysIncludeFunctionStarts) {
277       _endOfLoadCommands += sizeof(linkedit_data_command);
278       _countOfLoadCommands++;
279     }
280     if (_file.generateDataInCodeLoadCommand) {
281       _endOfLoadCommands += sizeof(linkedit_data_command);
282       _countOfLoadCommands++;
283     }
284     // Assign file offsets to each section.
285     _startOfSectionsContent = _endOfLoadCommands;
286     unsigned relocCount = 0;
287     uint64_t offset = _startOfSectionsContent;
288     for (const Section &sect : file.sections) {
289       if (isZeroFillSection(sect.type))
290         _sectInfo[&sect].fileOffset = 0;
291       else {
292         offset = llvm::alignTo(offset, sect.alignment);
293         _sectInfo[&sect].fileOffset = offset;
294         offset += sect.content.size();
295       }
296       relocCount += sect.relocations.size();
297     }
298     _endOfSectionsContent = offset;
299 
300     computeSymbolTableSizes();
301     computeFunctionStartsSize();
302     computeDataInCodeSize();
303 
304     // Align start of relocations.
305     _startOfRelocations = pointerAlign(_endOfSectionsContent);
306     _startOfFunctionStarts = _startOfRelocations + relocCount * 8;
307     _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize;
308     _startOfSymbols = _startOfDataInCode + _dataInCodeSize;
309     // Add Indirect symbol table.
310     _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize;
311     // Align start of symbol table and symbol strings.
312     _startOfSymbolStrings = _startOfIndirectSymbols
313                   + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t));
314     _endOfSymbolStrings = _startOfSymbolStrings
315                           + pointerAlign(_symbolStringPoolSize);
316     _endOfLinkEdit = _endOfSymbolStrings;
317     DEBUG_WITH_TYPE("MachOFileLayout",
318                   llvm::dbgs() << "MachOFileLayout()\n"
319       << "  startOfLoadCommands=" << _startOfLoadCommands << "\n"
320       << "  countOfLoadCommands=" << _countOfLoadCommands << "\n"
321       << "  endOfLoadCommands=" << _endOfLoadCommands << "\n"
322       << "  startOfRelocations=" << _startOfRelocations << "\n"
323       << "  startOfSymbols=" << _startOfSymbols << "\n"
324       << "  startOfSymbolStrings=" << _startOfSymbolStrings << "\n"
325       << "  endOfSymbolStrings=" << _endOfSymbolStrings << "\n"
326       << "  startOfSectionsContent=" << _startOfSectionsContent << "\n"
327       << "  endOfSectionsContent=" << _endOfSectionsContent << "\n");
328   } else {
329     // Final linked images have one load command per segment.
330     _endOfLoadCommands = _startOfLoadCommands
331                           + loadCommandsSize(_countOfLoadCommands,
332                                              alwaysIncludeFunctionStarts);
333 
334     // Assign section file offsets.
335     buildFileOffsets();
336     buildLinkEditInfo();
337 
338     // LINKEDIT of final linked images has in order:
339     // rebase info, binding info, lazy binding info, weak binding info,
340     // data-in-code, symbol table, indirect symbol table, symbol table strings.
341     _startOfRebaseInfo = _startOfLinkEdit;
342     _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size();
343     _startOfBindingInfo = _endOfRebaseInfo;
344     _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size();
345     _startOfLazyBindingInfo = _endOfBindingInfo;
346     _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size();
347     _startOfExportTrie = _endOfLazyBindingInfo;
348     _endOfExportTrie = _startOfExportTrie + _exportTrie.size();
349     _startOfFunctionStarts = _endOfExportTrie;
350     _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize;
351     _startOfSymbols = _startOfDataInCode + _dataInCodeSize;
352     _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize;
353     _startOfSymbolStrings = _startOfIndirectSymbols
354                   + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t));
355     _endOfSymbolStrings = _startOfSymbolStrings
356                           + pointerAlign(_symbolStringPoolSize);
357     _endOfLinkEdit = _endOfSymbolStrings;
358     DEBUG_WITH_TYPE("MachOFileLayout",
359                   llvm::dbgs() << "MachOFileLayout()\n"
360       << "  startOfLoadCommands=" << _startOfLoadCommands << "\n"
361       << "  countOfLoadCommands=" << _countOfLoadCommands << "\n"
362       << "  endOfLoadCommands=" << _endOfLoadCommands << "\n"
363       << "  startOfLinkEdit=" << _startOfLinkEdit << "\n"
364       << "  startOfRebaseInfo=" << _startOfRebaseInfo << "\n"
365       << "  endOfRebaseInfo=" << _endOfRebaseInfo << "\n"
366       << "  startOfBindingInfo=" << _startOfBindingInfo << "\n"
367       << "  endOfBindingInfo=" << _endOfBindingInfo << "\n"
368       << "  startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n"
369       << "  endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n"
370       << "  startOfExportTrie=" << _startOfExportTrie << "\n"
371       << "  endOfExportTrie=" << _endOfExportTrie << "\n"
372       << "  startOfFunctionStarts=" << _startOfFunctionStarts << "\n"
373       << "  startOfDataInCode=" << _startOfDataInCode << "\n"
374       << "  startOfSymbols=" << _startOfSymbols << "\n"
375       << "  startOfSymbolStrings=" << _startOfSymbolStrings << "\n"
376       << "  endOfSymbolStrings=" << _endOfSymbolStrings << "\n"
377       << "  addressOfLinkEdit=" << _addressOfLinkEdit << "\n");
378   }
379 }
380 
loadCommandsSize(uint32_t & count,bool alwaysIncludeFunctionStarts)381 uint32_t MachOFileLayout::loadCommandsSize(uint32_t &count,
382                                            bool alwaysIncludeFunctionStarts) {
383   uint32_t size = 0;
384   count = 0;
385 
386   const size_t segCommandSize =
387           (_is64 ? sizeof(segment_command_64) : sizeof(segment_command));
388   const size_t sectionSize = (_is64 ? sizeof(section_64) : sizeof(section));
389 
390   // Add LC_SEGMENT for each segment.
391   size += _file.segments.size() * segCommandSize;
392   count += _file.segments.size();
393   // Add section record for each section.
394   size += _file.sections.size() * sectionSize;
395 
396   // If creating a dylib, add LC_ID_DYLIB.
397   if (_file.fileType == llvm::MachO::MH_DYLIB) {
398     size += sizeof(dylib_command) + pointerAlign(_file.installName.size() + 1);
399     ++count;
400   }
401 
402   // Add LC_DYLD_INFO
403   size += sizeof(dyld_info_command);
404   ++count;
405 
406   // Add LC_SYMTAB
407   size += sizeof(symtab_command);
408   ++count;
409 
410   // Add LC_DYSYMTAB
411   if (_file.fileType != llvm::MachO::MH_PRELOAD) {
412     size += sizeof(dysymtab_command);
413     ++count;
414   }
415 
416   // If main executable add LC_LOAD_DYLINKER
417   if (_file.fileType == llvm::MachO::MH_EXECUTE) {
418     size += pointerAlign(sizeof(dylinker_command) + dyldPath().size()+1);
419     ++count;
420   }
421 
422   // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS,
423   // LC_VERSION_MIN_TVOS
424   if (_file.hasMinVersionLoadCommand) {
425     size += sizeof(version_min_command);
426     ++count;
427   }
428 
429   // Add LC_SOURCE_VERSION
430   size += sizeof(source_version_command);
431   ++count;
432 
433   // If main executable add LC_MAIN
434   if (_file.fileType == llvm::MachO::MH_EXECUTE) {
435     size += sizeof(entry_point_command);
436     ++count;
437   }
438 
439   // Add LC_LOAD_DYLIB for each dependent dylib.
440   for (const DependentDylib &dep : _file.dependentDylibs) {
441     size += sizeof(dylib_command) + pointerAlign(dep.path.size()+1);
442     ++count;
443   }
444 
445   // Add LC_RPATH
446   for (const StringRef &path : _file.rpaths) {
447     size += pointerAlign(sizeof(rpath_command) + path.size() + 1);
448     ++count;
449   }
450 
451   // Add LC_FUNCTION_STARTS if needed
452   if (!_file.functionStarts.empty() || alwaysIncludeFunctionStarts) {
453     size += sizeof(linkedit_data_command);
454     ++count;
455   }
456 
457   // Add LC_DATA_IN_CODE if requested.  Note, we do encode zero length entries.
458   // FIXME: Zero length entries is only to match ld64.  Should we change this?
459   if (_file.generateDataInCodeLoadCommand) {
460     size += sizeof(linkedit_data_command);
461     ++count;
462   }
463 
464   return size;
465 }
466 
overlaps(const Segment & s1,const Segment & s2)467 static bool overlaps(const Segment &s1, const Segment &s2) {
468   if (s2.address >= s1.address+s1.size)
469     return false;
470   if (s1.address >= s2.address+s2.size)
471     return false;
472   return true;
473 }
474 
overlaps(const Section & s1,const Section & s2)475 static bool overlaps(const Section &s1, const Section &s2) {
476   if (s2.address >= s1.address+s1.content.size())
477     return false;
478   if (s1.address >= s2.address+s2.content.size())
479     return false;
480   return true;
481 }
482 
buildFileOffsets()483 void MachOFileLayout::buildFileOffsets() {
484   // Verify no segments overlap
485   for (const Segment &sg1 : _file.segments) {
486     for (const Segment &sg2 : _file.segments) {
487       if (&sg1 == &sg2)
488         continue;
489       if (overlaps(sg1,sg2)) {
490         _ec = make_error_code(llvm::errc::executable_format_error);
491         return;
492       }
493     }
494   }
495 
496   // Verify no sections overlap
497   for (const Section &s1 : _file.sections) {
498     for (const Section &s2 : _file.sections) {
499       if (&s1 == &s2)
500         continue;
501       if (overlaps(s1,s2)) {
502         _ec = make_error_code(llvm::errc::executable_format_error);
503         return;
504       }
505     }
506   }
507 
508   // Build side table of extra info about segments and sections.
509   SegExtraInfo t;
510   t.fileOffset = 0;
511   for (const Segment &sg : _file.segments) {
512     _segInfo[&sg] = t;
513   }
514   SectionExtraInfo t2;
515   t2.fileOffset = 0;
516   // Assign sections to segments.
517   for (const Section &s : _file.sections) {
518     _sectInfo[&s] = t2;
519     bool foundSegment = false;
520     for (const Segment &sg : _file.segments) {
521       if (sg.name.equals(s.segmentName)) {
522         if ((s.address >= sg.address)
523                         && (s.address+s.content.size() <= sg.address+sg.size)) {
524           _segInfo[&sg].sections.push_back(&s);
525           foundSegment = true;
526           break;
527         }
528       }
529     }
530     if (!foundSegment) {
531       _ec = make_error_code(llvm::errc::executable_format_error);
532       return;
533     }
534   }
535 
536   // Assign file offsets.
537   uint32_t fileOffset = 0;
538   DEBUG_WITH_TYPE("MachOFileLayout",
539                   llvm::dbgs() << "buildFileOffsets()\n");
540   for (const Segment &sg : _file.segments) {
541     _segInfo[&sg].fileOffset = fileOffset;
542     if ((_seg1addr == INT64_MAX) && sg.init_access)
543       _seg1addr = sg.address;
544     DEBUG_WITH_TYPE("MachOFileLayout",
545                   llvm::dbgs() << "  segment=" << sg.name
546                   << ", fileOffset=" << _segInfo[&sg].fileOffset << "\n");
547 
548     uint32_t segFileSize = 0;
549     // A segment that is not zero-fill must use a least one page of disk space.
550     if (sg.init_access)
551       segFileSize = _file.pageSize;
552     for (const Section *s : _segInfo[&sg].sections) {
553       uint32_t sectOffset = s->address - sg.address;
554       uint32_t sectFileSize =
555         isZeroFillSection(s->type) ? 0 : s->content.size();
556       segFileSize = std::max(segFileSize, sectOffset + sectFileSize);
557 
558       _sectInfo[s].fileOffset = _segInfo[&sg].fileOffset + sectOffset;
559       DEBUG_WITH_TYPE("MachOFileLayout",
560                   llvm::dbgs() << "    section=" << s->sectionName
561                   << ", fileOffset=" << fileOffset << "\n");
562     }
563 
564     // round up all segments to page aligned, except __LINKEDIT
565     if (!sg.name.equals("__LINKEDIT")) {
566       _segInfo[&sg].fileSize = llvm::alignTo(segFileSize, _file.pageSize);
567       fileOffset = llvm::alignTo(fileOffset + segFileSize, _file.pageSize);
568     }
569     _addressOfLinkEdit = sg.address + sg.size;
570   }
571   _startOfLinkEdit = fileOffset;
572 }
573 
size() const574 size_t MachOFileLayout::size() const {
575   return _endOfSymbolStrings;
576 }
577 
writeMachHeader()578 void MachOFileLayout::writeMachHeader() {
579   auto cpusubtype = MachOLinkingContext::cpuSubtypeFromArch(_file.arch);
580   // dynamic x86 executables on newer OS version should also set the
581   // CPU_SUBTYPE_LIB64 mask in the CPU subtype.
582   // FIXME: Check that this is a dynamic executable, not a static one.
583   if (_file.fileType == llvm::MachO::MH_EXECUTE &&
584       cpusubtype == CPU_SUBTYPE_X86_64_ALL &&
585       _file.os == MachOLinkingContext::OS::macOSX) {
586     uint32_t version;
587     bool failed = MachOLinkingContext::parsePackedVersion("10.5", version);
588     if (!failed && _file.minOSverson >= version)
589       cpusubtype |= CPU_SUBTYPE_LIB64;
590   }
591 
592   mach_header *mh = reinterpret_cast<mach_header*>(_buffer);
593   mh->magic = _is64 ? llvm::MachO::MH_MAGIC_64 : llvm::MachO::MH_MAGIC;
594   mh->cputype =  MachOLinkingContext::cpuTypeFromArch(_file.arch);
595   mh->cpusubtype = cpusubtype;
596   mh->filetype = _file.fileType;
597   mh->ncmds = _countOfLoadCommands;
598   mh->sizeofcmds = _endOfLoadCommands - _startOfLoadCommands;
599   mh->flags = _file.flags;
600   if (_swap)
601     swapStruct(*mh);
602 }
603 
indirectSymbolIndex(const Section & sect,uint32_t & index)604 uint32_t MachOFileLayout::indirectSymbolIndex(const Section &sect,
605                                                    uint32_t &index) {
606   if (sect.indirectSymbols.empty())
607     return 0;
608   uint32_t result = index;
609   index += sect.indirectSymbols.size();
610   return result;
611 }
612 
indirectSymbolElementSize(const Section & sect)613 uint32_t MachOFileLayout::indirectSymbolElementSize(const Section &sect) {
614   if (sect.indirectSymbols.empty())
615     return 0;
616   if (sect.type != S_SYMBOL_STUBS)
617     return 0;
618   return sect.content.size() / sect.indirectSymbols.size();
619 }
620 
621 template <typename T>
writeSingleSegmentLoadCommand(uint8_t * & lc)622 llvm::Error MachOFileLayout::writeSingleSegmentLoadCommand(uint8_t *&lc) {
623   typename T::command* seg = reinterpret_cast<typename T::command*>(lc);
624   seg->cmd = T::LC;
625   seg->cmdsize = sizeof(typename T::command)
626                           + _file.sections.size() * sizeof(typename T::section);
627   uint8_t *next = lc + seg->cmdsize;
628   memset(seg->segname, 0, 16);
629   seg->flags = 0;
630   seg->vmaddr = 0;
631   seg->fileoff = _endOfLoadCommands;
632   seg->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE;
633   seg->initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE;
634   seg->nsects = _file.sections.size();
635   if (seg->nsects) {
636     seg->vmsize = _file.sections.back().address
637                 + _file.sections.back().content.size();
638     seg->filesize = _sectInfo[&_file.sections.back()].fileOffset +
639                     _file.sections.back().content.size() -
640                     _sectInfo[&_file.sections.front()].fileOffset;
641   }
642   if (_swap)
643     swapStruct(*seg);
644   typename T::section *sout = reinterpret_cast<typename T::section*>
645                                               (lc+sizeof(typename T::command));
646   uint32_t relOffset = _startOfRelocations;
647   uint32_t indirectSymRunningIndex = 0;
648   for (const Section &sin : _file.sections) {
649     setString16(sin.sectionName, sout->sectname);
650     setString16(sin.segmentName, sout->segname);
651     sout->addr = sin.address;
652     sout->size = sin.content.size();
653     sout->offset = _sectInfo[&sin].fileOffset;
654     sout->align = llvm::Log2_32(sin.alignment);
655     sout->reloff = sin.relocations.empty() ? 0 : relOffset;
656     sout->nreloc = sin.relocations.size();
657     sout->flags = sin.type | sin.attributes;
658     sout->reserved1 = indirectSymbolIndex(sin, indirectSymRunningIndex);
659     sout->reserved2 = indirectSymbolElementSize(sin);
660     relOffset += sin.relocations.size() * sizeof(any_relocation_info);
661     if (_swap)
662       swapStruct(*sout);
663     ++sout;
664   }
665   lc = next;
666   return llvm::Error::success();
667 }
668 
669 template <typename T>
writeSegmentLoadCommands(uint8_t * & lc)670 llvm::Error MachOFileLayout::writeSegmentLoadCommands(uint8_t *&lc) {
671   uint32_t indirectSymRunningIndex = 0;
672   for (const Segment &seg : _file.segments) {
673     // Link edit has no sections and a custom range of address, so handle it
674     // specially.
675     SegExtraInfo &segInfo = _segInfo[&seg];
676     if (seg.name.equals("__LINKEDIT")) {
677       size_t linkeditSize = _endOfLinkEdit - _startOfLinkEdit;
678       typename T::command* cmd = reinterpret_cast<typename T::command*>(lc);
679       cmd->cmd = T::LC;
680       cmd->cmdsize = sizeof(typename T::command);
681       uint8_t *next = lc + cmd->cmdsize;
682       setString16("__LINKEDIT", cmd->segname);
683       cmd->vmaddr   = _addressOfLinkEdit;
684       cmd->vmsize   = llvm::alignTo(linkeditSize, _file.pageSize);
685       cmd->fileoff  = _startOfLinkEdit;
686       cmd->filesize = linkeditSize;
687       cmd->initprot = seg.init_access;
688       cmd->maxprot  = seg.max_access;
689       cmd->nsects   = 0;
690       cmd->flags    = 0;
691       if (_swap)
692         swapStruct(*cmd);
693       lc = next;
694       continue;
695     }
696     // Write segment command with trailing sections.
697     typename T::command* cmd = reinterpret_cast<typename T::command*>(lc);
698     cmd->cmd = T::LC;
699     cmd->cmdsize = sizeof(typename T::command)
700                         + segInfo.sections.size() * sizeof(typename T::section);
701     uint8_t *next = lc + cmd->cmdsize;
702     setString16(seg.name, cmd->segname);
703     cmd->vmaddr   = seg.address;
704     cmd->vmsize   = seg.size;
705     cmd->fileoff  = segInfo.fileOffset;
706     cmd->filesize = segInfo.fileSize;
707     cmd->initprot = seg.init_access;
708     cmd->maxprot  = seg.max_access;
709     cmd->nsects   = segInfo.sections.size();
710     cmd->flags    = 0;
711     if (_swap)
712       swapStruct(*cmd);
713     typename T::section *sect = reinterpret_cast<typename T::section*>
714                                                (lc+sizeof(typename T::command));
715     for (const Section *section : segInfo.sections) {
716       setString16(section->sectionName, sect->sectname);
717       setString16(section->segmentName, sect->segname);
718       sect->addr      = section->address;
719       sect->size      = section->content.size();
720       if (isZeroFillSection(section->type))
721         sect->offset  = 0;
722       else
723         sect->offset  = section->address - seg.address + segInfo.fileOffset;
724       sect->align     = llvm::Log2_32(section->alignment);
725       sect->reloff    = 0;
726       sect->nreloc    = 0;
727       sect->flags     = section->type | section->attributes;
728       sect->reserved1 = indirectSymbolIndex(*section, indirectSymRunningIndex);
729       sect->reserved2 = indirectSymbolElementSize(*section);
730       if (_swap)
731         swapStruct(*sect);
732       ++sect;
733     }
734     lc = reinterpret_cast<uint8_t*>(next);
735   }
736   return llvm::Error::success();
737 }
738 
writeVersionMinLoadCommand(const NormalizedFile & _file,bool _swap,uint8_t * & lc)739 static void writeVersionMinLoadCommand(const NormalizedFile &_file,
740                                        bool _swap,
741                                        uint8_t *&lc) {
742   if (!_file.hasMinVersionLoadCommand)
743     return;
744   version_min_command *vm = reinterpret_cast<version_min_command*>(lc);
745   switch (_file.os) {
746     case MachOLinkingContext::OS::unknown:
747       vm->cmd     = _file.minOSVersionKind;
748       vm->cmdsize = sizeof(version_min_command);
749       vm->version = _file.minOSverson;
750       vm->sdk     = 0;
751       break;
752     case MachOLinkingContext::OS::macOSX:
753       vm->cmd     = LC_VERSION_MIN_MACOSX;
754       vm->cmdsize = sizeof(version_min_command);
755       vm->version = _file.minOSverson;
756       vm->sdk     = _file.sdkVersion;
757       break;
758     case MachOLinkingContext::OS::iOS:
759     case MachOLinkingContext::OS::iOS_simulator:
760       vm->cmd     = LC_VERSION_MIN_IPHONEOS;
761       vm->cmdsize = sizeof(version_min_command);
762       vm->version = _file.minOSverson;
763       vm->sdk     = _file.sdkVersion;
764       break;
765   }
766   if (_swap)
767     swapStruct(*vm);
768   lc += sizeof(version_min_command);
769 }
770 
writeLoadCommands()771 llvm::Error MachOFileLayout::writeLoadCommands() {
772   uint8_t *lc = &_buffer[_startOfLoadCommands];
773   if (_file.fileType == llvm::MachO::MH_OBJECT) {
774     // Object files have one unnamed segment which holds all sections.
775     if (_is64) {
776      if (auto ec = writeSingleSegmentLoadCommand<MachO64Trait>(lc))
777        return ec;
778     } else {
779       if (auto ec = writeSingleSegmentLoadCommand<MachO32Trait>(lc))
780         return ec;
781     }
782     // Add LC_SYMTAB with symbol table info
783     symtab_command* st = reinterpret_cast<symtab_command*>(lc);
784     st->cmd     = LC_SYMTAB;
785     st->cmdsize = sizeof(symtab_command);
786     st->symoff  = _startOfSymbols;
787     st->nsyms   = _file.stabsSymbols.size() + _file.localSymbols.size() +
788                   _file.globalSymbols.size() + _file.undefinedSymbols.size();
789     st->stroff  = _startOfSymbolStrings;
790     st->strsize = _endOfSymbolStrings - _startOfSymbolStrings;
791     if (_swap)
792       swapStruct(*st);
793     lc += sizeof(symtab_command);
794 
795     // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS,
796     // LC_VERSION_MIN_WATCHOS, LC_VERSION_MIN_TVOS
797     writeVersionMinLoadCommand(_file, _swap, lc);
798 
799     // Add LC_FUNCTION_STARTS if needed.
800     if (_functionStartsSize != 0) {
801       linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
802       dl->cmd      = LC_FUNCTION_STARTS;
803       dl->cmdsize  = sizeof(linkedit_data_command);
804       dl->dataoff  = _startOfFunctionStarts;
805       dl->datasize = _functionStartsSize;
806       if (_swap)
807         swapStruct(*dl);
808       lc += sizeof(linkedit_data_command);
809     }
810 
811     // Add LC_DATA_IN_CODE if requested.
812     if (_file.generateDataInCodeLoadCommand) {
813       linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
814       dl->cmd      = LC_DATA_IN_CODE;
815       dl->cmdsize  = sizeof(linkedit_data_command);
816       dl->dataoff  = _startOfDataInCode;
817       dl->datasize = _dataInCodeSize;
818       if (_swap)
819         swapStruct(*dl);
820       lc += sizeof(linkedit_data_command);
821     }
822   } else {
823     // Final linked images have sections under segments.
824     if (_is64) {
825       if (auto ec = writeSegmentLoadCommands<MachO64Trait>(lc))
826         return ec;
827     } else {
828       if (auto ec = writeSegmentLoadCommands<MachO32Trait>(lc))
829         return ec;
830     }
831 
832     // Add LC_ID_DYLIB command for dynamic libraries.
833     if (_file.fileType == llvm::MachO::MH_DYLIB) {
834       dylib_command *dc = reinterpret_cast<dylib_command*>(lc);
835       StringRef path = _file.installName;
836       uint32_t size = sizeof(dylib_command) + pointerAlign(path.size() + 1);
837       dc->cmd                         = LC_ID_DYLIB;
838       dc->cmdsize                     = size;
839       dc->dylib.name                  = sizeof(dylib_command); // offset
840       // needs to be some constant value different than the one in LC_LOAD_DYLIB
841       dc->dylib.timestamp             = 1;
842       dc->dylib.current_version       = _file.currentVersion;
843       dc->dylib.compatibility_version = _file.compatVersion;
844       if (_swap)
845         swapStruct(*dc);
846       memcpy(lc + sizeof(dylib_command), path.begin(), path.size());
847       lc[sizeof(dylib_command) + path.size()] = '\0';
848       lc += size;
849     }
850 
851     // Add LC_DYLD_INFO_ONLY.
852     dyld_info_command* di = reinterpret_cast<dyld_info_command*>(lc);
853     di->cmd            = LC_DYLD_INFO_ONLY;
854     di->cmdsize        = sizeof(dyld_info_command);
855     di->rebase_off     = _rebaseInfo.size() ? _startOfRebaseInfo : 0;
856     di->rebase_size    = _rebaseInfo.size();
857     di->bind_off       = _bindingInfo.size() ? _startOfBindingInfo : 0;
858     di->bind_size      = _bindingInfo.size();
859     di->weak_bind_off  = 0;
860     di->weak_bind_size = 0;
861     di->lazy_bind_off  = _lazyBindingInfo.size() ? _startOfLazyBindingInfo : 0;
862     di->lazy_bind_size = _lazyBindingInfo.size();
863     di->export_off     = _exportTrie.size() ? _startOfExportTrie : 0;
864     di->export_size    = _exportTrie.size();
865     if (_swap)
866       swapStruct(*di);
867     lc += sizeof(dyld_info_command);
868 
869     // Add LC_SYMTAB with symbol table info.
870     symtab_command* st = reinterpret_cast<symtab_command*>(lc);
871     st->cmd     = LC_SYMTAB;
872     st->cmdsize = sizeof(symtab_command);
873     st->symoff  = _startOfSymbols;
874     st->nsyms   = _file.stabsSymbols.size() + _file.localSymbols.size() +
875                   _file.globalSymbols.size() + _file.undefinedSymbols.size();
876     st->stroff  = _startOfSymbolStrings;
877     st->strsize = _endOfSymbolStrings - _startOfSymbolStrings;
878     if (_swap)
879       swapStruct(*st);
880     lc += sizeof(symtab_command);
881 
882     // Add LC_DYSYMTAB
883     if (_file.fileType != llvm::MachO::MH_PRELOAD) {
884       dysymtab_command* dst = reinterpret_cast<dysymtab_command*>(lc);
885       dst->cmd            = LC_DYSYMTAB;
886       dst->cmdsize        = sizeof(dysymtab_command);
887       dst->ilocalsym      = _symbolTableLocalsStartIndex;
888       dst->nlocalsym      = _file.stabsSymbols.size() +
889                             _file.localSymbols.size();
890       dst->iextdefsym     = _symbolTableGlobalsStartIndex;
891       dst->nextdefsym     = _file.globalSymbols.size();
892       dst->iundefsym      = _symbolTableUndefinesStartIndex;
893       dst->nundefsym      = _file.undefinedSymbols.size();
894       dst->tocoff         = 0;
895       dst->ntoc           = 0;
896       dst->modtaboff      = 0;
897       dst->nmodtab        = 0;
898       dst->extrefsymoff   = 0;
899       dst->nextrefsyms    = 0;
900       dst->indirectsymoff = _startOfIndirectSymbols;
901       dst->nindirectsyms  = _indirectSymbolTableCount;
902       dst->extreloff      = 0;
903       dst->nextrel        = 0;
904       dst->locreloff      = 0;
905       dst->nlocrel        = 0;
906       if (_swap)
907         swapStruct(*dst);
908       lc += sizeof(dysymtab_command);
909     }
910 
911     // If main executable, add LC_LOAD_DYLINKER
912     if (_file.fileType == llvm::MachO::MH_EXECUTE) {
913       // Build LC_LOAD_DYLINKER load command.
914       uint32_t size=pointerAlign(sizeof(dylinker_command)+dyldPath().size()+1);
915       dylinker_command* dl = reinterpret_cast<dylinker_command*>(lc);
916       dl->cmd              = LC_LOAD_DYLINKER;
917       dl->cmdsize          = size;
918       dl->name             = sizeof(dylinker_command); // offset
919       if (_swap)
920         swapStruct(*dl);
921       memcpy(lc+sizeof(dylinker_command), dyldPath().data(), dyldPath().size());
922       lc[sizeof(dylinker_command)+dyldPath().size()] = '\0';
923       lc += size;
924     }
925 
926     // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS,
927     // LC_VERSION_MIN_TVOS
928     writeVersionMinLoadCommand(_file, _swap, lc);
929 
930     // Add LC_SOURCE_VERSION
931     {
932       // Note, using a temporary here to appease UB as we may not be aligned
933       // enough for a struct containing a uint64_t when emitting a 32-bit binary
934       source_version_command sv;
935       sv.cmd       = LC_SOURCE_VERSION;
936       sv.cmdsize   = sizeof(source_version_command);
937       sv.version   = _file.sourceVersion;
938       if (_swap)
939         swapStruct(sv);
940       memcpy(lc, &sv, sizeof(source_version_command));
941       lc += sizeof(source_version_command);
942     }
943 
944     // If main executable, add LC_MAIN.
945     if (_file.fileType == llvm::MachO::MH_EXECUTE) {
946       // Build LC_MAIN load command.
947       // Note, using a temporary here to appease UB as we may not be aligned
948       // enough for a struct containing a uint64_t when emitting a 32-bit binary
949       entry_point_command ep;
950       ep.cmd       = LC_MAIN;
951       ep.cmdsize   = sizeof(entry_point_command);
952       ep.entryoff  = _file.entryAddress - _seg1addr;
953       ep.stacksize = _file.stackSize;
954       if (_swap)
955         swapStruct(ep);
956       memcpy(lc, &ep, sizeof(entry_point_command));
957       lc += sizeof(entry_point_command);
958     }
959 
960     // Add LC_LOAD_DYLIB commands
961     for (const DependentDylib &dep : _file.dependentDylibs) {
962       dylib_command* dc = reinterpret_cast<dylib_command*>(lc);
963       uint32_t size = sizeof(dylib_command) + pointerAlign(dep.path.size()+1);
964       dc->cmd                         = dep.kind;
965       dc->cmdsize                     = size;
966       dc->dylib.name                  = sizeof(dylib_command); // offset
967       // needs to be some constant value different than the one in LC_ID_DYLIB
968       dc->dylib.timestamp             = 2;
969       dc->dylib.current_version       = dep.currentVersion;
970       dc->dylib.compatibility_version = dep.compatVersion;
971       if (_swap)
972         swapStruct(*dc);
973       memcpy(lc+sizeof(dylib_command), dep.path.begin(), dep.path.size());
974       lc[sizeof(dylib_command)+dep.path.size()] = '\0';
975       lc += size;
976     }
977 
978     // Add LC_RPATH
979     for (const StringRef &path : _file.rpaths) {
980       rpath_command *rpc = reinterpret_cast<rpath_command *>(lc);
981       uint32_t size = pointerAlign(sizeof(rpath_command) + path.size() + 1);
982       rpc->cmd                         = LC_RPATH;
983       rpc->cmdsize                     = size;
984       rpc->path                        = sizeof(rpath_command); // offset
985       if (_swap)
986         swapStruct(*rpc);
987       memcpy(lc+sizeof(rpath_command), path.begin(), path.size());
988       lc[sizeof(rpath_command)+path.size()] = '\0';
989       lc += size;
990     }
991 
992     // Add LC_FUNCTION_STARTS if needed.
993     if (_functionStartsSize != 0) {
994       linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
995       dl->cmd      = LC_FUNCTION_STARTS;
996       dl->cmdsize  = sizeof(linkedit_data_command);
997       dl->dataoff  = _startOfFunctionStarts;
998       dl->datasize = _functionStartsSize;
999       if (_swap)
1000         swapStruct(*dl);
1001       lc += sizeof(linkedit_data_command);
1002     }
1003 
1004     // Add LC_DATA_IN_CODE if requested.
1005     if (_file.generateDataInCodeLoadCommand) {
1006       linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
1007       dl->cmd      = LC_DATA_IN_CODE;
1008       dl->cmdsize  = sizeof(linkedit_data_command);
1009       dl->dataoff  = _startOfDataInCode;
1010       dl->datasize = _dataInCodeSize;
1011       if (_swap)
1012         swapStruct(*dl);
1013       lc += sizeof(linkedit_data_command);
1014     }
1015   }
1016   assert(lc == &_buffer[_endOfLoadCommands]);
1017   return llvm::Error::success();
1018 }
1019 
writeSectionContent()1020 void MachOFileLayout::writeSectionContent() {
1021   for (const Section &s : _file.sections) {
1022     // Copy all section content to output buffer.
1023     if (isZeroFillSection(s.type))
1024       continue;
1025     if (s.content.empty())
1026       continue;
1027     uint32_t offset = _sectInfo[&s].fileOffset;
1028     assert(offset >= _endOfLoadCommands);
1029     uint8_t *p = &_buffer[offset];
1030     memcpy(p, &s.content[0], s.content.size());
1031     p += s.content.size();
1032   }
1033 }
1034 
writeRelocations()1035 void MachOFileLayout::writeRelocations() {
1036   uint32_t relOffset = _startOfRelocations;
1037   for (Section sect : _file.sections) {
1038     for (Relocation r : sect.relocations) {
1039       any_relocation_info* rb = reinterpret_cast<any_relocation_info*>(
1040                                                            &_buffer[relOffset]);
1041       *rb = packRelocation(r, _swap, _bigEndianArch);
1042       relOffset += sizeof(any_relocation_info);
1043     }
1044   }
1045 }
1046 
appendSymbols(const std::vector<Symbol> & symbols,uint32_t & symOffset,uint32_t & strOffset)1047 void MachOFileLayout::appendSymbols(const std::vector<Symbol> &symbols,
1048                                    uint32_t &symOffset, uint32_t &strOffset) {
1049   for (const Symbol &sym : symbols) {
1050     if (_is64) {
1051       nlist_64* nb = reinterpret_cast<nlist_64*>(&_buffer[symOffset]);
1052       nb->n_strx = strOffset - _startOfSymbolStrings;
1053       nb->n_type = sym.type | sym.scope;
1054       nb->n_sect = sym.sect;
1055       nb->n_desc = sym.desc;
1056       nb->n_value = sym.value;
1057       if (_swap)
1058         swapStruct(*nb);
1059       symOffset += sizeof(nlist_64);
1060     } else {
1061       nlist* nb = reinterpret_cast<nlist*>(&_buffer[symOffset]);
1062       nb->n_strx = strOffset - _startOfSymbolStrings;
1063       nb->n_type = sym.type | sym.scope;
1064       nb->n_sect = sym.sect;
1065       nb->n_desc = sym.desc;
1066       nb->n_value = sym.value;
1067       if (_swap)
1068         swapStruct(*nb);
1069       symOffset += sizeof(nlist);
1070     }
1071     memcpy(&_buffer[strOffset], sym.name.begin(), sym.name.size());
1072     strOffset += sym.name.size();
1073     _buffer[strOffset++] ='\0'; // Strings in table have nul terminator.
1074   }
1075 }
1076 
writeFunctionStartsInfo()1077 void MachOFileLayout::writeFunctionStartsInfo() {
1078   if (!_functionStartsSize)
1079     return;
1080   memcpy(&_buffer[_startOfFunctionStarts], _file.functionStarts.data(),
1081          _functionStartsSize);
1082 }
1083 
writeDataInCodeInfo()1084 void MachOFileLayout::writeDataInCodeInfo() {
1085   uint32_t offset = _startOfDataInCode;
1086   for (const DataInCode &entry : _file.dataInCode) {
1087     data_in_code_entry *dst = reinterpret_cast<data_in_code_entry*>(
1088                                                              &_buffer[offset]);
1089     dst->offset = entry.offset;
1090     dst->length = entry.length;
1091     dst->kind   = entry.kind;
1092     if (_swap)
1093       swapStruct(*dst);
1094     offset += sizeof(data_in_code_entry);
1095   }
1096 }
1097 
writeSymbolTable()1098 void MachOFileLayout::writeSymbolTable() {
1099   // Write symbol table and symbol strings in parallel.
1100   uint32_t symOffset = _startOfSymbols;
1101   uint32_t strOffset = _startOfSymbolStrings;
1102   // Reserve n_strx offset of zero to mean no name.
1103   _buffer[strOffset++] = ' ';
1104   _buffer[strOffset++] = '\0';
1105   appendSymbols(_file.stabsSymbols, symOffset, strOffset);
1106   appendSymbols(_file.localSymbols, symOffset, strOffset);
1107   appendSymbols(_file.globalSymbols, symOffset, strOffset);
1108   appendSymbols(_file.undefinedSymbols, symOffset, strOffset);
1109   // Write indirect symbol table array.
1110   uint32_t *indirects = reinterpret_cast<uint32_t*>
1111                                             (&_buffer[_startOfIndirectSymbols]);
1112   if (_file.fileType == llvm::MachO::MH_OBJECT) {
1113     // Object files have sections in same order as input normalized file.
1114     for (const Section &section : _file.sections) {
1115       for (uint32_t index : section.indirectSymbols) {
1116         if (_swap)
1117           *indirects++ = llvm::sys::getSwappedBytes(index);
1118         else
1119           *indirects++ = index;
1120       }
1121     }
1122   } else {
1123     // Final linked images must sort sections from normalized file.
1124     for (const Segment &seg : _file.segments) {
1125       SegExtraInfo &segInfo = _segInfo[&seg];
1126       for (const Section *section : segInfo.sections) {
1127         for (uint32_t index : section->indirectSymbols) {
1128           if (_swap)
1129             *indirects++ = llvm::sys::getSwappedBytes(index);
1130           else
1131             *indirects++ = index;
1132         }
1133       }
1134     }
1135   }
1136 }
1137 
writeRebaseInfo()1138 void MachOFileLayout::writeRebaseInfo() {
1139   memcpy(&_buffer[_startOfRebaseInfo], _rebaseInfo.bytes(), _rebaseInfo.size());
1140 }
1141 
writeBindingInfo()1142 void MachOFileLayout::writeBindingInfo() {
1143   memcpy(&_buffer[_startOfBindingInfo],
1144                                     _bindingInfo.bytes(), _bindingInfo.size());
1145 }
1146 
writeLazyBindingInfo()1147 void MachOFileLayout::writeLazyBindingInfo() {
1148   memcpy(&_buffer[_startOfLazyBindingInfo],
1149                             _lazyBindingInfo.bytes(), _lazyBindingInfo.size());
1150 }
1151 
writeExportInfo()1152 void MachOFileLayout::writeExportInfo() {
1153   memcpy(&_buffer[_startOfExportTrie], _exportTrie.bytes(), _exportTrie.size());
1154 }
1155 
buildLinkEditInfo()1156 void MachOFileLayout::buildLinkEditInfo() {
1157   buildRebaseInfo();
1158   buildBindInfo();
1159   buildLazyBindInfo();
1160   buildExportTrie();
1161   computeSymbolTableSizes();
1162   computeFunctionStartsSize();
1163   computeDataInCodeSize();
1164 }
1165 
buildSectionRelocations()1166 void MachOFileLayout::buildSectionRelocations() {
1167 
1168 }
1169 
buildRebaseInfo()1170 void MachOFileLayout::buildRebaseInfo() {
1171   // TODO: compress rebasing info.
1172   for (const RebaseLocation& entry : _file.rebasingInfo) {
1173     _rebaseInfo.append_byte(REBASE_OPCODE_SET_TYPE_IMM | entry.kind);
1174     _rebaseInfo.append_byte(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
1175                             | entry.segIndex);
1176     _rebaseInfo.append_uleb128(entry.segOffset);
1177     _rebaseInfo.append_uleb128(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1);
1178   }
1179   _rebaseInfo.append_byte(REBASE_OPCODE_DONE);
1180   _rebaseInfo.align(_is64 ? 8 : 4);
1181 }
1182 
buildBindInfo()1183 void MachOFileLayout::buildBindInfo() {
1184   // TODO: compress bind info.
1185   uint64_t lastAddend = 0;
1186   int lastOrdinal = 0x80000000;
1187   StringRef lastSymbolName;
1188   BindType lastType = (BindType)0;
1189   Hex32 lastSegOffset = ~0U;
1190   uint8_t lastSegIndex = (uint8_t)~0U;
1191   for (const BindLocation& entry : _file.bindingInfo) {
1192     if (entry.ordinal != lastOrdinal) {
1193       if (entry.ordinal <= 0)
1194         _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM |
1195                                  (entry.ordinal & BIND_IMMEDIATE_MASK));
1196       else if (entry.ordinal <= BIND_IMMEDIATE_MASK)
1197         _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
1198                                  entry.ordinal);
1199       else {
1200         _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
1201         _bindingInfo.append_uleb128(entry.ordinal);
1202       }
1203       lastOrdinal = entry.ordinal;
1204     }
1205 
1206     if (lastSymbolName != entry.symbolName) {
1207       _bindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM);
1208       _bindingInfo.append_string(entry.symbolName);
1209       lastSymbolName = entry.symbolName;
1210     }
1211 
1212     if (lastType != entry.kind) {
1213       _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind);
1214       lastType = entry.kind;
1215     }
1216 
1217     if (lastSegIndex != entry.segIndex || lastSegOffset != entry.segOffset) {
1218       _bindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
1219                                | entry.segIndex);
1220       _bindingInfo.append_uleb128(entry.segOffset);
1221       lastSegIndex = entry.segIndex;
1222       lastSegOffset = entry.segOffset;
1223     }
1224     if (entry.addend != lastAddend) {
1225       _bindingInfo.append_byte(BIND_OPCODE_SET_ADDEND_SLEB);
1226       _bindingInfo.append_sleb128(entry.addend);
1227       lastAddend = entry.addend;
1228     }
1229     _bindingInfo.append_byte(BIND_OPCODE_DO_BIND);
1230   }
1231   _bindingInfo.append_byte(BIND_OPCODE_DONE);
1232   _bindingInfo.align(_is64 ? 8 : 4);
1233 }
1234 
buildLazyBindInfo()1235 void MachOFileLayout::buildLazyBindInfo() {
1236   for (const BindLocation& entry : _file.lazyBindingInfo) {
1237     _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
1238                             | entry.segIndex);
1239     _lazyBindingInfo.append_uleb128(entry.segOffset);
1240     if (entry.ordinal <= 0)
1241       _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM |
1242                                    (entry.ordinal & BIND_IMMEDIATE_MASK));
1243     else if (entry.ordinal <= BIND_IMMEDIATE_MASK)
1244       _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
1245                                    entry.ordinal);
1246     else {
1247       _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
1248       _lazyBindingInfo.append_uleb128(entry.ordinal);
1249     }
1250     // FIXME: We need to | the opcode here with flags.
1251     _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM);
1252     _lazyBindingInfo.append_string(entry.symbolName);
1253     _lazyBindingInfo.append_byte(BIND_OPCODE_DO_BIND);
1254     _lazyBindingInfo.append_byte(BIND_OPCODE_DONE);
1255   }
1256   _lazyBindingInfo.align(_is64 ? 8 : 4);
1257 }
1258 
addSymbol(const Export & entry,BumpPtrAllocator & allocator,std::vector<TrieNode * > & allNodes)1259 void TrieNode::addSymbol(const Export& entry,
1260                          BumpPtrAllocator &allocator,
1261                          std::vector<TrieNode*> &allNodes) {
1262   StringRef partialStr = entry.name.drop_front(_cummulativeString.size());
1263   for (TrieEdge &edge : _children) {
1264     StringRef edgeStr = edge._subString;
1265     if (partialStr.startswith(edgeStr)) {
1266       // Already have matching edge, go down that path.
1267       edge._child->addSymbol(entry, allocator, allNodes);
1268       return;
1269     }
1270     // See if string has common prefix with existing edge.
1271     for (int n=edgeStr.size()-1; n > 0; --n) {
1272       if (partialStr.substr(0, n).equals(edgeStr.substr(0, n))) {
1273         // Splice in new node:  was A -> C,  now A -> B -> C
1274         StringRef bNodeStr = edge._child->_cummulativeString;
1275         bNodeStr = bNodeStr.drop_back(edgeStr.size()-n).copy(allocator);
1276         auto *bNode = new (allocator) TrieNode(bNodeStr);
1277         allNodes.push_back(bNode);
1278         TrieNode* cNode = edge._child;
1279         StringRef abEdgeStr = edgeStr.substr(0,n).copy(allocator);
1280         StringRef bcEdgeStr = edgeStr.substr(n).copy(allocator);
1281         DEBUG_WITH_TYPE("trie-builder", llvm::dbgs()
1282                         << "splice in TrieNode('" << bNodeStr
1283                         << "') between edge '"
1284                         << abEdgeStr << "' and edge='"
1285                         << bcEdgeStr<< "'\n");
1286         TrieEdge& abEdge = edge;
1287         abEdge._subString = abEdgeStr;
1288         abEdge._child = bNode;
1289         auto *bcEdge = new (allocator) TrieEdge(bcEdgeStr, cNode);
1290         bNode->_children.insert(bNode->_children.end(), bcEdge);
1291         bNode->addSymbol(entry, allocator, allNodes);
1292         return;
1293       }
1294     }
1295   }
1296   if (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
1297     assert(entry.otherOffset != 0);
1298   }
1299   if (entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
1300     assert(entry.otherOffset != 0);
1301   }
1302   // No commonality with any existing child, make a new edge.
1303   auto *newNode = new (allocator) TrieNode(entry.name.copy(allocator));
1304   auto *newEdge = new (allocator) TrieEdge(partialStr, newNode);
1305   _children.insert(_children.end(), newEdge);
1306   DEBUG_WITH_TYPE("trie-builder", llvm::dbgs()
1307                    << "new TrieNode('" << entry.name << "') with edge '"
1308                    << partialStr << "' from node='"
1309                    << _cummulativeString << "'\n");
1310   newNode->_address = entry.offset;
1311   newNode->_flags = entry.flags | entry.kind;
1312   newNode->_other = entry.otherOffset;
1313   if ((entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && !entry.otherName.empty())
1314     newNode->_importedName = entry.otherName.copy(allocator);
1315   newNode->_hasExportInfo = true;
1316   allNodes.push_back(newNode);
1317 }
1318 
addOrderedNodes(const Export & entry,std::vector<TrieNode * > & orderedNodes)1319 void TrieNode::addOrderedNodes(const Export& entry,
1320                                std::vector<TrieNode*> &orderedNodes) {
1321   if (!_ordered) {
1322     orderedNodes.push_back(this);
1323     _ordered = true;
1324   }
1325 
1326   StringRef partialStr = entry.name.drop_front(_cummulativeString.size());
1327   for (TrieEdge &edge : _children) {
1328     StringRef edgeStr = edge._subString;
1329     if (partialStr.startswith(edgeStr)) {
1330       // Already have matching edge, go down that path.
1331       edge._child->addOrderedNodes(entry, orderedNodes);
1332       return;
1333     }
1334   }
1335 }
1336 
updateOffset(uint32_t & offset)1337 bool TrieNode::updateOffset(uint32_t& offset) {
1338   uint32_t nodeSize = 1; // Length when no export info
1339   if (_hasExportInfo) {
1340     if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
1341       nodeSize = llvm::getULEB128Size(_flags);
1342       nodeSize += llvm::getULEB128Size(_other); // Other contains ordinal.
1343       nodeSize += _importedName.size();
1344       ++nodeSize; // Trailing zero in imported name.
1345     } else {
1346       nodeSize = llvm::getULEB128Size(_flags) + llvm::getULEB128Size(_address);
1347       if (_flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER)
1348         nodeSize += llvm::getULEB128Size(_other);
1349     }
1350     // Overall node size so far is uleb128 of export info + actual export info.
1351     nodeSize += llvm::getULEB128Size(nodeSize);
1352   }
1353   // Compute size of all child edges.
1354   ++nodeSize; // Byte for number of children.
1355   for (TrieEdge &edge : _children) {
1356     nodeSize += edge._subString.size() + 1 // String length.
1357               + llvm::getULEB128Size(edge._child->_trieOffset); // Offset len.
1358   }
1359   // On input, 'offset' is new prefered location for this node.
1360   bool result = (_trieOffset != offset);
1361   // Store new location in node object for use by parents.
1362   _trieOffset = offset;
1363   // Update offset for next iteration.
1364   offset += nodeSize;
1365   // Return true if _trieOffset was changed.
1366   return result;
1367 }
1368 
appendToByteBuffer(ByteBuffer & out)1369 void TrieNode::appendToByteBuffer(ByteBuffer &out) {
1370   if (_hasExportInfo) {
1371     if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
1372       if (!_importedName.empty()) {
1373         // nodes with re-export info: size, flags, ordinal, import-name
1374         uint32_t nodeSize = llvm::getULEB128Size(_flags)
1375                           + llvm::getULEB128Size(_other)
1376                           + _importedName.size() + 1;
1377         assert(nodeSize < 256);
1378         out.append_byte(nodeSize);
1379         out.append_uleb128(_flags);
1380         out.append_uleb128(_other);
1381         out.append_string(_importedName);
1382       } else {
1383         // nodes without re-export info: size, flags, ordinal, empty-string
1384         uint32_t nodeSize = llvm::getULEB128Size(_flags)
1385                           + llvm::getULEB128Size(_other) + 1;
1386         assert(nodeSize < 256);
1387         out.append_byte(nodeSize);
1388         out.append_uleb128(_flags);
1389         out.append_uleb128(_other);
1390         out.append_byte(0);
1391       }
1392     } else if ( _flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
1393       // Nodes with export info: size, flags, address, other
1394       uint32_t nodeSize = llvm::getULEB128Size(_flags)
1395                         + llvm::getULEB128Size(_address)
1396                         + llvm::getULEB128Size(_other);
1397       assert(nodeSize < 256);
1398       out.append_byte(nodeSize);
1399       out.append_uleb128(_flags);
1400       out.append_uleb128(_address);
1401       out.append_uleb128(_other);
1402     } else {
1403       // Nodes with export info: size, flags, address
1404       uint32_t nodeSize = llvm::getULEB128Size(_flags)
1405                         + llvm::getULEB128Size(_address);
1406       assert(nodeSize < 256);
1407       out.append_byte(nodeSize);
1408       out.append_uleb128(_flags);
1409       out.append_uleb128(_address);
1410     }
1411   } else {
1412     // Node with no export info.
1413     uint32_t nodeSize = 0;
1414     out.append_byte(nodeSize);
1415   }
1416   // Add number of children.
1417   assert(_children.size() < 256);
1418   out.append_byte(_children.size());
1419   // Append each child edge substring and node offset.
1420   for (TrieEdge &edge : _children) {
1421     out.append_string(edge._subString);
1422     out.append_uleb128(edge._child->_trieOffset);
1423   }
1424 }
1425 
buildExportTrie()1426 void MachOFileLayout::buildExportTrie() {
1427   if (_file.exportInfo.empty())
1428     return;
1429 
1430   // For all temporary strings and objects used building trie.
1431   BumpPtrAllocator allocator;
1432 
1433   // Build trie of all exported symbols.
1434   auto *rootNode = new (allocator) TrieNode(StringRef());
1435   std::vector<TrieNode*> allNodes;
1436   allNodes.reserve(_file.exportInfo.size()*2);
1437   allNodes.push_back(rootNode);
1438   for (const Export& entry : _file.exportInfo) {
1439     rootNode->addSymbol(entry, allocator, allNodes);
1440   }
1441 
1442   std::vector<TrieNode*> orderedNodes;
1443   orderedNodes.reserve(allNodes.size());
1444 
1445   for (const Export& entry : _file.exportInfo)
1446     rootNode->addOrderedNodes(entry, orderedNodes);
1447 
1448   // Assign each node in the vector an offset in the trie stream, iterating
1449   // until all uleb128 sizes have stabilized.
1450   bool more;
1451   do {
1452     uint32_t offset = 0;
1453     more = false;
1454     for (TrieNode* node : orderedNodes) {
1455       if (node->updateOffset(offset))
1456         more = true;
1457     }
1458   } while (more);
1459 
1460   // Serialize trie to ByteBuffer.
1461   for (TrieNode* node : orderedNodes) {
1462     node->appendToByteBuffer(_exportTrie);
1463   }
1464   _exportTrie.align(_is64 ? 8 : 4);
1465 }
1466 
computeSymbolTableSizes()1467 void MachOFileLayout::computeSymbolTableSizes() {
1468   // MachO symbol tables have three ranges: locals, globals, and undefines
1469   const size_t nlistSize = (_is64 ? sizeof(nlist_64) : sizeof(nlist));
1470   _symbolTableSize = nlistSize * (_file.stabsSymbols.size()
1471                                 + _file.localSymbols.size()
1472                                 + _file.globalSymbols.size()
1473                                 + _file.undefinedSymbols.size());
1474   // Always reserve 1-byte for the empty string and 1-byte for its terminator.
1475   _symbolStringPoolSize = 2;
1476   for (const Symbol &sym : _file.stabsSymbols) {
1477     _symbolStringPoolSize += (sym.name.size()+1);
1478   }
1479   for (const Symbol &sym : _file.localSymbols) {
1480     _symbolStringPoolSize += (sym.name.size()+1);
1481   }
1482   for (const Symbol &sym : _file.globalSymbols) {
1483     _symbolStringPoolSize += (sym.name.size()+1);
1484   }
1485   for (const Symbol &sym : _file.undefinedSymbols) {
1486     _symbolStringPoolSize += (sym.name.size()+1);
1487   }
1488   _symbolTableLocalsStartIndex = 0;
1489   _symbolTableGlobalsStartIndex = _file.stabsSymbols.size() +
1490                                   _file.localSymbols.size();
1491   _symbolTableUndefinesStartIndex = _symbolTableGlobalsStartIndex
1492                                     + _file.globalSymbols.size();
1493 
1494   _indirectSymbolTableCount = 0;
1495   for (const Section &sect : _file.sections) {
1496     _indirectSymbolTableCount += sect.indirectSymbols.size();
1497   }
1498 }
1499 
computeFunctionStartsSize()1500 void MachOFileLayout::computeFunctionStartsSize() {
1501   _functionStartsSize = _file.functionStarts.size();
1502 }
1503 
computeDataInCodeSize()1504 void MachOFileLayout::computeDataInCodeSize() {
1505   _dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry);
1506 }
1507 
writeLinkEditContent()1508 void MachOFileLayout::writeLinkEditContent() {
1509   if (_file.fileType == llvm::MachO::MH_OBJECT) {
1510     writeRelocations();
1511     writeFunctionStartsInfo();
1512     writeDataInCodeInfo();
1513     writeSymbolTable();
1514   } else {
1515     writeRebaseInfo();
1516     writeBindingInfo();
1517     writeLazyBindingInfo();
1518     // TODO: add weak binding info
1519     writeExportInfo();
1520     writeFunctionStartsInfo();
1521     writeDataInCodeInfo();
1522     writeSymbolTable();
1523   }
1524 }
1525 
writeBinary(StringRef path)1526 llvm::Error MachOFileLayout::writeBinary(StringRef path) {
1527   // Check for pending error from constructor.
1528   if (_ec)
1529     return llvm::errorCodeToError(_ec);
1530   // Create FileOutputBuffer with calculated size.
1531   unsigned flags = 0;
1532   if (_file.fileType != llvm::MachO::MH_OBJECT)
1533     flags = llvm::FileOutputBuffer::F_executable;
1534   Expected<std::unique_ptr<llvm::FileOutputBuffer>> fobOrErr =
1535       llvm::FileOutputBuffer::create(path, size(), flags);
1536   if (Error E = fobOrErr.takeError())
1537     return E;
1538   std::unique_ptr<llvm::FileOutputBuffer> &fob = *fobOrErr;
1539   // Write content.
1540   _buffer = fob->getBufferStart();
1541   writeMachHeader();
1542   if (auto ec = writeLoadCommands())
1543     return ec;
1544   writeSectionContent();
1545   writeLinkEditContent();
1546   if (Error E = fob->commit())
1547     return E;
1548 
1549   return llvm::Error::success();
1550 }
1551 
1552 /// Takes in-memory normalized view and writes a mach-o object file.
writeBinary(const NormalizedFile & file,StringRef path)1553 llvm::Error writeBinary(const NormalizedFile &file, StringRef path) {
1554   MachOFileLayout layout(file, false);
1555   return layout.writeBinary(path);
1556 }
1557 
1558 } // namespace normalized
1559 } // namespace mach_o
1560 } // namespace lld
1561