1 //===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 ///
10 /// \file For mach-o object files, this implementation converts normalized
11 /// mach-o in memory to mach-o binary on disk.
12 ///
13 /// +---------------+
14 /// | binary mach-o |
15 /// +---------------+
16 /// ^
17 /// |
18 /// |
19 /// +------------+
20 /// | normalized |
21 /// +------------+
22
23 #include "MachONormalizedFile.h"
24 #include "MachONormalizedFileBinaryUtils.h"
25 #include "lld/Common/LLVM.h"
26 #include "lld/Core/Error.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/ADT/ilist.h"
31 #include "llvm/ADT/ilist_node.h"
32 #include "llvm/BinaryFormat/MachO.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/FileOutputBuffer.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/Host.h"
40 #include "llvm/Support/MemoryBuffer.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include <functional>
43 #include <list>
44 #include <map>
45 #include <system_error>
46
47 using namespace llvm::MachO;
48
49 namespace lld {
50 namespace mach_o {
51 namespace normalized {
52
53 struct TrieNode; // Forward declaration.
54
55 struct TrieEdge : public llvm::ilist_node<TrieEdge> {
TrieEdgelld::mach_o::normalized::TrieEdge56 TrieEdge(StringRef s, TrieNode *node) : _subString(s), _child(node) {}
57
58 StringRef _subString;
59 struct TrieNode *_child;
60 };
61
62 } // namespace normalized
63 } // namespace mach_o
64 } // namespace lld
65
66
67 namespace llvm {
68 using lld::mach_o::normalized::TrieEdge;
69 template <>
70 struct ilist_alloc_traits<TrieEdge> : ilist_noalloc_traits<TrieEdge> {};
71 } // namespace llvm
72
73
74 namespace lld {
75 namespace mach_o {
76 namespace normalized {
77
78 struct TrieNode {
79 typedef llvm::ilist<TrieEdge> TrieEdgeList;
80
TrieNodelld::mach_o::normalized::TrieNode81 TrieNode(StringRef s)
82 : _cummulativeString(s), _address(0), _flags(0), _other(0),
83 _trieOffset(0), _hasExportInfo(false) {}
84 ~TrieNode() = default;
85
86 void addSymbol(const Export &entry, BumpPtrAllocator &allocator,
87 std::vector<TrieNode *> &allNodes);
88
89 void addOrderedNodes(const Export &entry,
90 std::vector<TrieNode *> &allNodes);
91 bool updateOffset(uint32_t &offset);
92 void appendToByteBuffer(ByteBuffer &out);
93
94 private:
95 StringRef _cummulativeString;
96 TrieEdgeList _children;
97 uint64_t _address;
98 uint64_t _flags;
99 uint64_t _other;
100 StringRef _importedName;
101 uint32_t _trieOffset;
102 bool _hasExportInfo;
103 bool _ordered = false;
104 };
105
106 /// Utility class for writing a mach-o binary file given an in-memory
107 /// normalized file.
108 class MachOFileLayout {
109 public:
110 /// All layout computation is done in the constructor.
111 MachOFileLayout(const NormalizedFile &file, bool alwaysIncludeFunctionStarts);
112
113 /// Returns the final file size as computed in the constructor.
114 size_t size() const;
115
116 // Returns size of the mach_header and load commands.
117 size_t headerAndLoadCommandsSize() const;
118
119 /// Writes the normalized file as a binary mach-o file to the specified
120 /// path. This does not have a stream interface because the generated
121 /// file may need the 'x' bit set.
122 llvm::Error writeBinary(StringRef path);
123
124 private:
125 uint32_t loadCommandsSize(uint32_t &count,
126 bool alwaysIncludeFunctionStarts);
127 void buildFileOffsets();
128 void writeMachHeader();
129 llvm::Error writeLoadCommands();
130 void writeSectionContent();
131 void writeRelocations();
132 void writeSymbolTable();
133 void writeRebaseInfo();
134 void writeBindingInfo();
135 void writeLazyBindingInfo();
136 void writeExportInfo();
137 void writeFunctionStartsInfo();
138 void writeDataInCodeInfo();
139 void writeLinkEditContent();
140 void buildLinkEditInfo();
141 void buildRebaseInfo();
142 void buildBindInfo();
143 void buildLazyBindInfo();
144 void buildExportTrie();
145 void computeFunctionStartsSize();
146 void computeDataInCodeSize();
147 void computeSymbolTableSizes();
148 void buildSectionRelocations();
149 void appendSymbols(const std::vector<Symbol> &symbols,
150 uint32_t &symOffset, uint32_t &strOffset);
151 uint32_t indirectSymbolIndex(const Section §, uint32_t &index);
152 uint32_t indirectSymbolElementSize(const Section §);
153
154 // For use as template parameter to load command methods.
155 struct MachO64Trait {
156 typedef llvm::MachO::segment_command_64 command;
157 typedef llvm::MachO::section_64 section;
158 enum { LC = llvm::MachO::LC_SEGMENT_64 };
159 };
160
161 // For use as template parameter to load command methods.
162 struct MachO32Trait {
163 typedef llvm::MachO::segment_command command;
164 typedef llvm::MachO::section section;
165 enum { LC = llvm::MachO::LC_SEGMENT };
166 };
167
168 template <typename T>
169 llvm::Error writeSingleSegmentLoadCommand(uint8_t *&lc);
170 template <typename T> llvm::Error writeSegmentLoadCommands(uint8_t *&lc);
171
172 uint32_t pointerAlign(uint32_t value);
173 static StringRef dyldPath();
174
175 struct SegExtraInfo {
176 uint32_t fileOffset;
177 uint32_t fileSize;
178 std::vector<const Section*> sections;
179 };
180 typedef std::map<const Segment*, SegExtraInfo> SegMap;
181 struct SectionExtraInfo {
182 uint32_t fileOffset;
183 };
184 typedef std::map<const Section*, SectionExtraInfo> SectionMap;
185
186 const NormalizedFile &_file;
187 std::error_code _ec;
188 uint8_t *_buffer;
189 const bool _is64;
190 const bool _swap;
191 const bool _bigEndianArch;
192 uint64_t _seg1addr;
193 uint32_t _startOfLoadCommands;
194 uint32_t _countOfLoadCommands;
195 uint32_t _endOfLoadCommands;
196 uint32_t _startOfRelocations;
197 uint32_t _startOfFunctionStarts;
198 uint32_t _startOfDataInCode;
199 uint32_t _startOfSymbols;
200 uint32_t _startOfIndirectSymbols;
201 uint32_t _startOfSymbolStrings;
202 uint32_t _endOfSymbolStrings;
203 uint32_t _symbolTableLocalsStartIndex;
204 uint32_t _symbolTableGlobalsStartIndex;
205 uint32_t _symbolTableUndefinesStartIndex;
206 uint32_t _symbolStringPoolSize;
207 uint32_t _symbolTableSize;
208 uint32_t _functionStartsSize;
209 uint32_t _dataInCodeSize;
210 uint32_t _indirectSymbolTableCount;
211 // Used in object file creation only
212 uint32_t _startOfSectionsContent;
213 uint32_t _endOfSectionsContent;
214 // Used in final linked image only
215 uint32_t _startOfLinkEdit;
216 uint32_t _startOfRebaseInfo;
217 uint32_t _endOfRebaseInfo;
218 uint32_t _startOfBindingInfo;
219 uint32_t _endOfBindingInfo;
220 uint32_t _startOfLazyBindingInfo;
221 uint32_t _endOfLazyBindingInfo;
222 uint32_t _startOfExportTrie;
223 uint32_t _endOfExportTrie;
224 uint32_t _endOfLinkEdit;
225 uint64_t _addressOfLinkEdit;
226 SegMap _segInfo;
227 SectionMap _sectInfo;
228 ByteBuffer _rebaseInfo;
229 ByteBuffer _bindingInfo;
230 ByteBuffer _lazyBindingInfo;
231 ByteBuffer _weakBindingInfo;
232 ByteBuffer _exportTrie;
233 };
234
headerAndLoadCommandsSize(const NormalizedFile & file,bool includeFunctionStarts)235 size_t headerAndLoadCommandsSize(const NormalizedFile &file,
236 bool includeFunctionStarts) {
237 MachOFileLayout layout(file, includeFunctionStarts);
238 return layout.headerAndLoadCommandsSize();
239 }
240
dyldPath()241 StringRef MachOFileLayout::dyldPath() {
242 return "/usr/lib/dyld";
243 }
244
pointerAlign(uint32_t value)245 uint32_t MachOFileLayout::pointerAlign(uint32_t value) {
246 return llvm::alignTo(value, _is64 ? 8 : 4);
247 }
248
249
headerAndLoadCommandsSize() const250 size_t MachOFileLayout::headerAndLoadCommandsSize() const {
251 return _endOfLoadCommands;
252 }
253
MachOFileLayout(const NormalizedFile & file,bool alwaysIncludeFunctionStarts)254 MachOFileLayout::MachOFileLayout(const NormalizedFile &file,
255 bool alwaysIncludeFunctionStarts)
256 : _file(file),
257 _is64(MachOLinkingContext::is64Bit(file.arch)),
258 _swap(!MachOLinkingContext::isHostEndian(file.arch)),
259 _bigEndianArch(MachOLinkingContext::isBigEndian(file.arch)),
260 _seg1addr(INT64_MAX) {
261 _startOfLoadCommands = _is64 ? sizeof(mach_header_64) : sizeof(mach_header);
262 const size_t segCommandBaseSize =
263 (_is64 ? sizeof(segment_command_64) : sizeof(segment_command));
264 const size_t sectsSize = (_is64 ? sizeof(section_64) : sizeof(section));
265 if (file.fileType == llvm::MachO::MH_OBJECT) {
266 // object files have just one segment load command containing all sections
267 _endOfLoadCommands = _startOfLoadCommands
268 + segCommandBaseSize
269 + file.sections.size() * sectsSize
270 + sizeof(symtab_command);
271 _countOfLoadCommands = 2;
272 if (file.hasMinVersionLoadCommand) {
273 _endOfLoadCommands += sizeof(version_min_command);
274 _countOfLoadCommands++;
275 }
276 if (!_file.functionStarts.empty() || alwaysIncludeFunctionStarts) {
277 _endOfLoadCommands += sizeof(linkedit_data_command);
278 _countOfLoadCommands++;
279 }
280 if (_file.generateDataInCodeLoadCommand) {
281 _endOfLoadCommands += sizeof(linkedit_data_command);
282 _countOfLoadCommands++;
283 }
284 // Assign file offsets to each section.
285 _startOfSectionsContent = _endOfLoadCommands;
286 unsigned relocCount = 0;
287 uint64_t offset = _startOfSectionsContent;
288 for (const Section § : file.sections) {
289 if (isZeroFillSection(sect.type))
290 _sectInfo[§].fileOffset = 0;
291 else {
292 offset = llvm::alignTo(offset, sect.alignment);
293 _sectInfo[§].fileOffset = offset;
294 offset += sect.content.size();
295 }
296 relocCount += sect.relocations.size();
297 }
298 _endOfSectionsContent = offset;
299
300 computeSymbolTableSizes();
301 computeFunctionStartsSize();
302 computeDataInCodeSize();
303
304 // Align start of relocations.
305 _startOfRelocations = pointerAlign(_endOfSectionsContent);
306 _startOfFunctionStarts = _startOfRelocations + relocCount * 8;
307 _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize;
308 _startOfSymbols = _startOfDataInCode + _dataInCodeSize;
309 // Add Indirect symbol table.
310 _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize;
311 // Align start of symbol table and symbol strings.
312 _startOfSymbolStrings = _startOfIndirectSymbols
313 + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t));
314 _endOfSymbolStrings = _startOfSymbolStrings
315 + pointerAlign(_symbolStringPoolSize);
316 _endOfLinkEdit = _endOfSymbolStrings;
317 DEBUG_WITH_TYPE("MachOFileLayout",
318 llvm::dbgs() << "MachOFileLayout()\n"
319 << " startOfLoadCommands=" << _startOfLoadCommands << "\n"
320 << " countOfLoadCommands=" << _countOfLoadCommands << "\n"
321 << " endOfLoadCommands=" << _endOfLoadCommands << "\n"
322 << " startOfRelocations=" << _startOfRelocations << "\n"
323 << " startOfSymbols=" << _startOfSymbols << "\n"
324 << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n"
325 << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n"
326 << " startOfSectionsContent=" << _startOfSectionsContent << "\n"
327 << " endOfSectionsContent=" << _endOfSectionsContent << "\n");
328 } else {
329 // Final linked images have one load command per segment.
330 _endOfLoadCommands = _startOfLoadCommands
331 + loadCommandsSize(_countOfLoadCommands,
332 alwaysIncludeFunctionStarts);
333
334 // Assign section file offsets.
335 buildFileOffsets();
336 buildLinkEditInfo();
337
338 // LINKEDIT of final linked images has in order:
339 // rebase info, binding info, lazy binding info, weak binding info,
340 // data-in-code, symbol table, indirect symbol table, symbol table strings.
341 _startOfRebaseInfo = _startOfLinkEdit;
342 _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size();
343 _startOfBindingInfo = _endOfRebaseInfo;
344 _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size();
345 _startOfLazyBindingInfo = _endOfBindingInfo;
346 _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size();
347 _startOfExportTrie = _endOfLazyBindingInfo;
348 _endOfExportTrie = _startOfExportTrie + _exportTrie.size();
349 _startOfFunctionStarts = _endOfExportTrie;
350 _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize;
351 _startOfSymbols = _startOfDataInCode + _dataInCodeSize;
352 _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize;
353 _startOfSymbolStrings = _startOfIndirectSymbols
354 + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t));
355 _endOfSymbolStrings = _startOfSymbolStrings
356 + pointerAlign(_symbolStringPoolSize);
357 _endOfLinkEdit = _endOfSymbolStrings;
358 DEBUG_WITH_TYPE("MachOFileLayout",
359 llvm::dbgs() << "MachOFileLayout()\n"
360 << " startOfLoadCommands=" << _startOfLoadCommands << "\n"
361 << " countOfLoadCommands=" << _countOfLoadCommands << "\n"
362 << " endOfLoadCommands=" << _endOfLoadCommands << "\n"
363 << " startOfLinkEdit=" << _startOfLinkEdit << "\n"
364 << " startOfRebaseInfo=" << _startOfRebaseInfo << "\n"
365 << " endOfRebaseInfo=" << _endOfRebaseInfo << "\n"
366 << " startOfBindingInfo=" << _startOfBindingInfo << "\n"
367 << " endOfBindingInfo=" << _endOfBindingInfo << "\n"
368 << " startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n"
369 << " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n"
370 << " startOfExportTrie=" << _startOfExportTrie << "\n"
371 << " endOfExportTrie=" << _endOfExportTrie << "\n"
372 << " startOfFunctionStarts=" << _startOfFunctionStarts << "\n"
373 << " startOfDataInCode=" << _startOfDataInCode << "\n"
374 << " startOfSymbols=" << _startOfSymbols << "\n"
375 << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n"
376 << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n"
377 << " addressOfLinkEdit=" << _addressOfLinkEdit << "\n");
378 }
379 }
380
loadCommandsSize(uint32_t & count,bool alwaysIncludeFunctionStarts)381 uint32_t MachOFileLayout::loadCommandsSize(uint32_t &count,
382 bool alwaysIncludeFunctionStarts) {
383 uint32_t size = 0;
384 count = 0;
385
386 const size_t segCommandSize =
387 (_is64 ? sizeof(segment_command_64) : sizeof(segment_command));
388 const size_t sectionSize = (_is64 ? sizeof(section_64) : sizeof(section));
389
390 // Add LC_SEGMENT for each segment.
391 size += _file.segments.size() * segCommandSize;
392 count += _file.segments.size();
393 // Add section record for each section.
394 size += _file.sections.size() * sectionSize;
395
396 // If creating a dylib, add LC_ID_DYLIB.
397 if (_file.fileType == llvm::MachO::MH_DYLIB) {
398 size += sizeof(dylib_command) + pointerAlign(_file.installName.size() + 1);
399 ++count;
400 }
401
402 // Add LC_DYLD_INFO
403 size += sizeof(dyld_info_command);
404 ++count;
405
406 // Add LC_SYMTAB
407 size += sizeof(symtab_command);
408 ++count;
409
410 // Add LC_DYSYMTAB
411 if (_file.fileType != llvm::MachO::MH_PRELOAD) {
412 size += sizeof(dysymtab_command);
413 ++count;
414 }
415
416 // If main executable add LC_LOAD_DYLINKER
417 if (_file.fileType == llvm::MachO::MH_EXECUTE) {
418 size += pointerAlign(sizeof(dylinker_command) + dyldPath().size()+1);
419 ++count;
420 }
421
422 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS,
423 // LC_VERSION_MIN_TVOS
424 if (_file.hasMinVersionLoadCommand) {
425 size += sizeof(version_min_command);
426 ++count;
427 }
428
429 // Add LC_SOURCE_VERSION
430 size += sizeof(source_version_command);
431 ++count;
432
433 // If main executable add LC_MAIN
434 if (_file.fileType == llvm::MachO::MH_EXECUTE) {
435 size += sizeof(entry_point_command);
436 ++count;
437 }
438
439 // Add LC_LOAD_DYLIB for each dependent dylib.
440 for (const DependentDylib &dep : _file.dependentDylibs) {
441 size += sizeof(dylib_command) + pointerAlign(dep.path.size()+1);
442 ++count;
443 }
444
445 // Add LC_RPATH
446 for (const StringRef &path : _file.rpaths) {
447 size += pointerAlign(sizeof(rpath_command) + path.size() + 1);
448 ++count;
449 }
450
451 // Add LC_FUNCTION_STARTS if needed
452 if (!_file.functionStarts.empty() || alwaysIncludeFunctionStarts) {
453 size += sizeof(linkedit_data_command);
454 ++count;
455 }
456
457 // Add LC_DATA_IN_CODE if requested. Note, we do encode zero length entries.
458 // FIXME: Zero length entries is only to match ld64. Should we change this?
459 if (_file.generateDataInCodeLoadCommand) {
460 size += sizeof(linkedit_data_command);
461 ++count;
462 }
463
464 return size;
465 }
466
overlaps(const Segment & s1,const Segment & s2)467 static bool overlaps(const Segment &s1, const Segment &s2) {
468 if (s2.address >= s1.address+s1.size)
469 return false;
470 if (s1.address >= s2.address+s2.size)
471 return false;
472 return true;
473 }
474
overlaps(const Section & s1,const Section & s2)475 static bool overlaps(const Section &s1, const Section &s2) {
476 if (s2.address >= s1.address+s1.content.size())
477 return false;
478 if (s1.address >= s2.address+s2.content.size())
479 return false;
480 return true;
481 }
482
buildFileOffsets()483 void MachOFileLayout::buildFileOffsets() {
484 // Verify no segments overlap
485 for (const Segment &sg1 : _file.segments) {
486 for (const Segment &sg2 : _file.segments) {
487 if (&sg1 == &sg2)
488 continue;
489 if (overlaps(sg1,sg2)) {
490 _ec = make_error_code(llvm::errc::executable_format_error);
491 return;
492 }
493 }
494 }
495
496 // Verify no sections overlap
497 for (const Section &s1 : _file.sections) {
498 for (const Section &s2 : _file.sections) {
499 if (&s1 == &s2)
500 continue;
501 if (overlaps(s1,s2)) {
502 _ec = make_error_code(llvm::errc::executable_format_error);
503 return;
504 }
505 }
506 }
507
508 // Build side table of extra info about segments and sections.
509 SegExtraInfo t;
510 t.fileOffset = 0;
511 for (const Segment &sg : _file.segments) {
512 _segInfo[&sg] = t;
513 }
514 SectionExtraInfo t2;
515 t2.fileOffset = 0;
516 // Assign sections to segments.
517 for (const Section &s : _file.sections) {
518 _sectInfo[&s] = t2;
519 bool foundSegment = false;
520 for (const Segment &sg : _file.segments) {
521 if (sg.name.equals(s.segmentName)) {
522 if ((s.address >= sg.address)
523 && (s.address+s.content.size() <= sg.address+sg.size)) {
524 _segInfo[&sg].sections.push_back(&s);
525 foundSegment = true;
526 break;
527 }
528 }
529 }
530 if (!foundSegment) {
531 _ec = make_error_code(llvm::errc::executable_format_error);
532 return;
533 }
534 }
535
536 // Assign file offsets.
537 uint32_t fileOffset = 0;
538 DEBUG_WITH_TYPE("MachOFileLayout",
539 llvm::dbgs() << "buildFileOffsets()\n");
540 for (const Segment &sg : _file.segments) {
541 _segInfo[&sg].fileOffset = fileOffset;
542 if ((_seg1addr == INT64_MAX) && sg.init_access)
543 _seg1addr = sg.address;
544 DEBUG_WITH_TYPE("MachOFileLayout",
545 llvm::dbgs() << " segment=" << sg.name
546 << ", fileOffset=" << _segInfo[&sg].fileOffset << "\n");
547
548 uint32_t segFileSize = 0;
549 // A segment that is not zero-fill must use a least one page of disk space.
550 if (sg.init_access)
551 segFileSize = _file.pageSize;
552 for (const Section *s : _segInfo[&sg].sections) {
553 uint32_t sectOffset = s->address - sg.address;
554 uint32_t sectFileSize =
555 isZeroFillSection(s->type) ? 0 : s->content.size();
556 segFileSize = std::max(segFileSize, sectOffset + sectFileSize);
557
558 _sectInfo[s].fileOffset = _segInfo[&sg].fileOffset + sectOffset;
559 DEBUG_WITH_TYPE("MachOFileLayout",
560 llvm::dbgs() << " section=" << s->sectionName
561 << ", fileOffset=" << fileOffset << "\n");
562 }
563
564 // round up all segments to page aligned, except __LINKEDIT
565 if (!sg.name.equals("__LINKEDIT")) {
566 _segInfo[&sg].fileSize = llvm::alignTo(segFileSize, _file.pageSize);
567 fileOffset = llvm::alignTo(fileOffset + segFileSize, _file.pageSize);
568 }
569 _addressOfLinkEdit = sg.address + sg.size;
570 }
571 _startOfLinkEdit = fileOffset;
572 }
573
size() const574 size_t MachOFileLayout::size() const {
575 return _endOfSymbolStrings;
576 }
577
writeMachHeader()578 void MachOFileLayout::writeMachHeader() {
579 auto cpusubtype = MachOLinkingContext::cpuSubtypeFromArch(_file.arch);
580 // dynamic x86 executables on newer OS version should also set the
581 // CPU_SUBTYPE_LIB64 mask in the CPU subtype.
582 // FIXME: Check that this is a dynamic executable, not a static one.
583 if (_file.fileType == llvm::MachO::MH_EXECUTE &&
584 cpusubtype == CPU_SUBTYPE_X86_64_ALL &&
585 _file.os == MachOLinkingContext::OS::macOSX) {
586 uint32_t version;
587 bool failed = MachOLinkingContext::parsePackedVersion("10.5", version);
588 if (!failed && _file.minOSverson >= version)
589 cpusubtype |= CPU_SUBTYPE_LIB64;
590 }
591
592 mach_header *mh = reinterpret_cast<mach_header*>(_buffer);
593 mh->magic = _is64 ? llvm::MachO::MH_MAGIC_64 : llvm::MachO::MH_MAGIC;
594 mh->cputype = MachOLinkingContext::cpuTypeFromArch(_file.arch);
595 mh->cpusubtype = cpusubtype;
596 mh->filetype = _file.fileType;
597 mh->ncmds = _countOfLoadCommands;
598 mh->sizeofcmds = _endOfLoadCommands - _startOfLoadCommands;
599 mh->flags = _file.flags;
600 if (_swap)
601 swapStruct(*mh);
602 }
603
indirectSymbolIndex(const Section & sect,uint32_t & index)604 uint32_t MachOFileLayout::indirectSymbolIndex(const Section §,
605 uint32_t &index) {
606 if (sect.indirectSymbols.empty())
607 return 0;
608 uint32_t result = index;
609 index += sect.indirectSymbols.size();
610 return result;
611 }
612
indirectSymbolElementSize(const Section & sect)613 uint32_t MachOFileLayout::indirectSymbolElementSize(const Section §) {
614 if (sect.indirectSymbols.empty())
615 return 0;
616 if (sect.type != S_SYMBOL_STUBS)
617 return 0;
618 return sect.content.size() / sect.indirectSymbols.size();
619 }
620
621 template <typename T>
writeSingleSegmentLoadCommand(uint8_t * & lc)622 llvm::Error MachOFileLayout::writeSingleSegmentLoadCommand(uint8_t *&lc) {
623 typename T::command* seg = reinterpret_cast<typename T::command*>(lc);
624 seg->cmd = T::LC;
625 seg->cmdsize = sizeof(typename T::command)
626 + _file.sections.size() * sizeof(typename T::section);
627 uint8_t *next = lc + seg->cmdsize;
628 memset(seg->segname, 0, 16);
629 seg->flags = 0;
630 seg->vmaddr = 0;
631 seg->fileoff = _endOfLoadCommands;
632 seg->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE;
633 seg->initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE;
634 seg->nsects = _file.sections.size();
635 if (seg->nsects) {
636 seg->vmsize = _file.sections.back().address
637 + _file.sections.back().content.size();
638 seg->filesize = _sectInfo[&_file.sections.back()].fileOffset +
639 _file.sections.back().content.size() -
640 _sectInfo[&_file.sections.front()].fileOffset;
641 }
642 if (_swap)
643 swapStruct(*seg);
644 typename T::section *sout = reinterpret_cast<typename T::section*>
645 (lc+sizeof(typename T::command));
646 uint32_t relOffset = _startOfRelocations;
647 uint32_t indirectSymRunningIndex = 0;
648 for (const Section &sin : _file.sections) {
649 setString16(sin.sectionName, sout->sectname);
650 setString16(sin.segmentName, sout->segname);
651 sout->addr = sin.address;
652 sout->size = sin.content.size();
653 sout->offset = _sectInfo[&sin].fileOffset;
654 sout->align = llvm::Log2_32(sin.alignment);
655 sout->reloff = sin.relocations.empty() ? 0 : relOffset;
656 sout->nreloc = sin.relocations.size();
657 sout->flags = sin.type | sin.attributes;
658 sout->reserved1 = indirectSymbolIndex(sin, indirectSymRunningIndex);
659 sout->reserved2 = indirectSymbolElementSize(sin);
660 relOffset += sin.relocations.size() * sizeof(any_relocation_info);
661 if (_swap)
662 swapStruct(*sout);
663 ++sout;
664 }
665 lc = next;
666 return llvm::Error::success();
667 }
668
669 template <typename T>
writeSegmentLoadCommands(uint8_t * & lc)670 llvm::Error MachOFileLayout::writeSegmentLoadCommands(uint8_t *&lc) {
671 uint32_t indirectSymRunningIndex = 0;
672 for (const Segment &seg : _file.segments) {
673 // Link edit has no sections and a custom range of address, so handle it
674 // specially.
675 SegExtraInfo &segInfo = _segInfo[&seg];
676 if (seg.name.equals("__LINKEDIT")) {
677 size_t linkeditSize = _endOfLinkEdit - _startOfLinkEdit;
678 typename T::command* cmd = reinterpret_cast<typename T::command*>(lc);
679 cmd->cmd = T::LC;
680 cmd->cmdsize = sizeof(typename T::command);
681 uint8_t *next = lc + cmd->cmdsize;
682 setString16("__LINKEDIT", cmd->segname);
683 cmd->vmaddr = _addressOfLinkEdit;
684 cmd->vmsize = llvm::alignTo(linkeditSize, _file.pageSize);
685 cmd->fileoff = _startOfLinkEdit;
686 cmd->filesize = linkeditSize;
687 cmd->initprot = seg.init_access;
688 cmd->maxprot = seg.max_access;
689 cmd->nsects = 0;
690 cmd->flags = 0;
691 if (_swap)
692 swapStruct(*cmd);
693 lc = next;
694 continue;
695 }
696 // Write segment command with trailing sections.
697 typename T::command* cmd = reinterpret_cast<typename T::command*>(lc);
698 cmd->cmd = T::LC;
699 cmd->cmdsize = sizeof(typename T::command)
700 + segInfo.sections.size() * sizeof(typename T::section);
701 uint8_t *next = lc + cmd->cmdsize;
702 setString16(seg.name, cmd->segname);
703 cmd->vmaddr = seg.address;
704 cmd->vmsize = seg.size;
705 cmd->fileoff = segInfo.fileOffset;
706 cmd->filesize = segInfo.fileSize;
707 cmd->initprot = seg.init_access;
708 cmd->maxprot = seg.max_access;
709 cmd->nsects = segInfo.sections.size();
710 cmd->flags = 0;
711 if (_swap)
712 swapStruct(*cmd);
713 typename T::section *sect = reinterpret_cast<typename T::section*>
714 (lc+sizeof(typename T::command));
715 for (const Section *section : segInfo.sections) {
716 setString16(section->sectionName, sect->sectname);
717 setString16(section->segmentName, sect->segname);
718 sect->addr = section->address;
719 sect->size = section->content.size();
720 if (isZeroFillSection(section->type))
721 sect->offset = 0;
722 else
723 sect->offset = section->address - seg.address + segInfo.fileOffset;
724 sect->align = llvm::Log2_32(section->alignment);
725 sect->reloff = 0;
726 sect->nreloc = 0;
727 sect->flags = section->type | section->attributes;
728 sect->reserved1 = indirectSymbolIndex(*section, indirectSymRunningIndex);
729 sect->reserved2 = indirectSymbolElementSize(*section);
730 if (_swap)
731 swapStruct(*sect);
732 ++sect;
733 }
734 lc = reinterpret_cast<uint8_t*>(next);
735 }
736 return llvm::Error::success();
737 }
738
writeVersionMinLoadCommand(const NormalizedFile & _file,bool _swap,uint8_t * & lc)739 static void writeVersionMinLoadCommand(const NormalizedFile &_file,
740 bool _swap,
741 uint8_t *&lc) {
742 if (!_file.hasMinVersionLoadCommand)
743 return;
744 version_min_command *vm = reinterpret_cast<version_min_command*>(lc);
745 switch (_file.os) {
746 case MachOLinkingContext::OS::unknown:
747 vm->cmd = _file.minOSVersionKind;
748 vm->cmdsize = sizeof(version_min_command);
749 vm->version = _file.minOSverson;
750 vm->sdk = 0;
751 break;
752 case MachOLinkingContext::OS::macOSX:
753 vm->cmd = LC_VERSION_MIN_MACOSX;
754 vm->cmdsize = sizeof(version_min_command);
755 vm->version = _file.minOSverson;
756 vm->sdk = _file.sdkVersion;
757 break;
758 case MachOLinkingContext::OS::iOS:
759 case MachOLinkingContext::OS::iOS_simulator:
760 vm->cmd = LC_VERSION_MIN_IPHONEOS;
761 vm->cmdsize = sizeof(version_min_command);
762 vm->version = _file.minOSverson;
763 vm->sdk = _file.sdkVersion;
764 break;
765 }
766 if (_swap)
767 swapStruct(*vm);
768 lc += sizeof(version_min_command);
769 }
770
writeLoadCommands()771 llvm::Error MachOFileLayout::writeLoadCommands() {
772 uint8_t *lc = &_buffer[_startOfLoadCommands];
773 if (_file.fileType == llvm::MachO::MH_OBJECT) {
774 // Object files have one unnamed segment which holds all sections.
775 if (_is64) {
776 if (auto ec = writeSingleSegmentLoadCommand<MachO64Trait>(lc))
777 return ec;
778 } else {
779 if (auto ec = writeSingleSegmentLoadCommand<MachO32Trait>(lc))
780 return ec;
781 }
782 // Add LC_SYMTAB with symbol table info
783 symtab_command* st = reinterpret_cast<symtab_command*>(lc);
784 st->cmd = LC_SYMTAB;
785 st->cmdsize = sizeof(symtab_command);
786 st->symoff = _startOfSymbols;
787 st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() +
788 _file.globalSymbols.size() + _file.undefinedSymbols.size();
789 st->stroff = _startOfSymbolStrings;
790 st->strsize = _endOfSymbolStrings - _startOfSymbolStrings;
791 if (_swap)
792 swapStruct(*st);
793 lc += sizeof(symtab_command);
794
795 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS,
796 // LC_VERSION_MIN_WATCHOS, LC_VERSION_MIN_TVOS
797 writeVersionMinLoadCommand(_file, _swap, lc);
798
799 // Add LC_FUNCTION_STARTS if needed.
800 if (_functionStartsSize != 0) {
801 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
802 dl->cmd = LC_FUNCTION_STARTS;
803 dl->cmdsize = sizeof(linkedit_data_command);
804 dl->dataoff = _startOfFunctionStarts;
805 dl->datasize = _functionStartsSize;
806 if (_swap)
807 swapStruct(*dl);
808 lc += sizeof(linkedit_data_command);
809 }
810
811 // Add LC_DATA_IN_CODE if requested.
812 if (_file.generateDataInCodeLoadCommand) {
813 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
814 dl->cmd = LC_DATA_IN_CODE;
815 dl->cmdsize = sizeof(linkedit_data_command);
816 dl->dataoff = _startOfDataInCode;
817 dl->datasize = _dataInCodeSize;
818 if (_swap)
819 swapStruct(*dl);
820 lc += sizeof(linkedit_data_command);
821 }
822 } else {
823 // Final linked images have sections under segments.
824 if (_is64) {
825 if (auto ec = writeSegmentLoadCommands<MachO64Trait>(lc))
826 return ec;
827 } else {
828 if (auto ec = writeSegmentLoadCommands<MachO32Trait>(lc))
829 return ec;
830 }
831
832 // Add LC_ID_DYLIB command for dynamic libraries.
833 if (_file.fileType == llvm::MachO::MH_DYLIB) {
834 dylib_command *dc = reinterpret_cast<dylib_command*>(lc);
835 StringRef path = _file.installName;
836 uint32_t size = sizeof(dylib_command) + pointerAlign(path.size() + 1);
837 dc->cmd = LC_ID_DYLIB;
838 dc->cmdsize = size;
839 dc->dylib.name = sizeof(dylib_command); // offset
840 // needs to be some constant value different than the one in LC_LOAD_DYLIB
841 dc->dylib.timestamp = 1;
842 dc->dylib.current_version = _file.currentVersion;
843 dc->dylib.compatibility_version = _file.compatVersion;
844 if (_swap)
845 swapStruct(*dc);
846 memcpy(lc + sizeof(dylib_command), path.begin(), path.size());
847 lc[sizeof(dylib_command) + path.size()] = '\0';
848 lc += size;
849 }
850
851 // Add LC_DYLD_INFO_ONLY.
852 dyld_info_command* di = reinterpret_cast<dyld_info_command*>(lc);
853 di->cmd = LC_DYLD_INFO_ONLY;
854 di->cmdsize = sizeof(dyld_info_command);
855 di->rebase_off = _rebaseInfo.size() ? _startOfRebaseInfo : 0;
856 di->rebase_size = _rebaseInfo.size();
857 di->bind_off = _bindingInfo.size() ? _startOfBindingInfo : 0;
858 di->bind_size = _bindingInfo.size();
859 di->weak_bind_off = 0;
860 di->weak_bind_size = 0;
861 di->lazy_bind_off = _lazyBindingInfo.size() ? _startOfLazyBindingInfo : 0;
862 di->lazy_bind_size = _lazyBindingInfo.size();
863 di->export_off = _exportTrie.size() ? _startOfExportTrie : 0;
864 di->export_size = _exportTrie.size();
865 if (_swap)
866 swapStruct(*di);
867 lc += sizeof(dyld_info_command);
868
869 // Add LC_SYMTAB with symbol table info.
870 symtab_command* st = reinterpret_cast<symtab_command*>(lc);
871 st->cmd = LC_SYMTAB;
872 st->cmdsize = sizeof(symtab_command);
873 st->symoff = _startOfSymbols;
874 st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() +
875 _file.globalSymbols.size() + _file.undefinedSymbols.size();
876 st->stroff = _startOfSymbolStrings;
877 st->strsize = _endOfSymbolStrings - _startOfSymbolStrings;
878 if (_swap)
879 swapStruct(*st);
880 lc += sizeof(symtab_command);
881
882 // Add LC_DYSYMTAB
883 if (_file.fileType != llvm::MachO::MH_PRELOAD) {
884 dysymtab_command* dst = reinterpret_cast<dysymtab_command*>(lc);
885 dst->cmd = LC_DYSYMTAB;
886 dst->cmdsize = sizeof(dysymtab_command);
887 dst->ilocalsym = _symbolTableLocalsStartIndex;
888 dst->nlocalsym = _file.stabsSymbols.size() +
889 _file.localSymbols.size();
890 dst->iextdefsym = _symbolTableGlobalsStartIndex;
891 dst->nextdefsym = _file.globalSymbols.size();
892 dst->iundefsym = _symbolTableUndefinesStartIndex;
893 dst->nundefsym = _file.undefinedSymbols.size();
894 dst->tocoff = 0;
895 dst->ntoc = 0;
896 dst->modtaboff = 0;
897 dst->nmodtab = 0;
898 dst->extrefsymoff = 0;
899 dst->nextrefsyms = 0;
900 dst->indirectsymoff = _startOfIndirectSymbols;
901 dst->nindirectsyms = _indirectSymbolTableCount;
902 dst->extreloff = 0;
903 dst->nextrel = 0;
904 dst->locreloff = 0;
905 dst->nlocrel = 0;
906 if (_swap)
907 swapStruct(*dst);
908 lc += sizeof(dysymtab_command);
909 }
910
911 // If main executable, add LC_LOAD_DYLINKER
912 if (_file.fileType == llvm::MachO::MH_EXECUTE) {
913 // Build LC_LOAD_DYLINKER load command.
914 uint32_t size=pointerAlign(sizeof(dylinker_command)+dyldPath().size()+1);
915 dylinker_command* dl = reinterpret_cast<dylinker_command*>(lc);
916 dl->cmd = LC_LOAD_DYLINKER;
917 dl->cmdsize = size;
918 dl->name = sizeof(dylinker_command); // offset
919 if (_swap)
920 swapStruct(*dl);
921 memcpy(lc+sizeof(dylinker_command), dyldPath().data(), dyldPath().size());
922 lc[sizeof(dylinker_command)+dyldPath().size()] = '\0';
923 lc += size;
924 }
925
926 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS,
927 // LC_VERSION_MIN_TVOS
928 writeVersionMinLoadCommand(_file, _swap, lc);
929
930 // Add LC_SOURCE_VERSION
931 {
932 // Note, using a temporary here to appease UB as we may not be aligned
933 // enough for a struct containing a uint64_t when emitting a 32-bit binary
934 source_version_command sv;
935 sv.cmd = LC_SOURCE_VERSION;
936 sv.cmdsize = sizeof(source_version_command);
937 sv.version = _file.sourceVersion;
938 if (_swap)
939 swapStruct(sv);
940 memcpy(lc, &sv, sizeof(source_version_command));
941 lc += sizeof(source_version_command);
942 }
943
944 // If main executable, add LC_MAIN.
945 if (_file.fileType == llvm::MachO::MH_EXECUTE) {
946 // Build LC_MAIN load command.
947 // Note, using a temporary here to appease UB as we may not be aligned
948 // enough for a struct containing a uint64_t when emitting a 32-bit binary
949 entry_point_command ep;
950 ep.cmd = LC_MAIN;
951 ep.cmdsize = sizeof(entry_point_command);
952 ep.entryoff = _file.entryAddress - _seg1addr;
953 ep.stacksize = _file.stackSize;
954 if (_swap)
955 swapStruct(ep);
956 memcpy(lc, &ep, sizeof(entry_point_command));
957 lc += sizeof(entry_point_command);
958 }
959
960 // Add LC_LOAD_DYLIB commands
961 for (const DependentDylib &dep : _file.dependentDylibs) {
962 dylib_command* dc = reinterpret_cast<dylib_command*>(lc);
963 uint32_t size = sizeof(dylib_command) + pointerAlign(dep.path.size()+1);
964 dc->cmd = dep.kind;
965 dc->cmdsize = size;
966 dc->dylib.name = sizeof(dylib_command); // offset
967 // needs to be some constant value different than the one in LC_ID_DYLIB
968 dc->dylib.timestamp = 2;
969 dc->dylib.current_version = dep.currentVersion;
970 dc->dylib.compatibility_version = dep.compatVersion;
971 if (_swap)
972 swapStruct(*dc);
973 memcpy(lc+sizeof(dylib_command), dep.path.begin(), dep.path.size());
974 lc[sizeof(dylib_command)+dep.path.size()] = '\0';
975 lc += size;
976 }
977
978 // Add LC_RPATH
979 for (const StringRef &path : _file.rpaths) {
980 rpath_command *rpc = reinterpret_cast<rpath_command *>(lc);
981 uint32_t size = pointerAlign(sizeof(rpath_command) + path.size() + 1);
982 rpc->cmd = LC_RPATH;
983 rpc->cmdsize = size;
984 rpc->path = sizeof(rpath_command); // offset
985 if (_swap)
986 swapStruct(*rpc);
987 memcpy(lc+sizeof(rpath_command), path.begin(), path.size());
988 lc[sizeof(rpath_command)+path.size()] = '\0';
989 lc += size;
990 }
991
992 // Add LC_FUNCTION_STARTS if needed.
993 if (_functionStartsSize != 0) {
994 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
995 dl->cmd = LC_FUNCTION_STARTS;
996 dl->cmdsize = sizeof(linkedit_data_command);
997 dl->dataoff = _startOfFunctionStarts;
998 dl->datasize = _functionStartsSize;
999 if (_swap)
1000 swapStruct(*dl);
1001 lc += sizeof(linkedit_data_command);
1002 }
1003
1004 // Add LC_DATA_IN_CODE if requested.
1005 if (_file.generateDataInCodeLoadCommand) {
1006 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
1007 dl->cmd = LC_DATA_IN_CODE;
1008 dl->cmdsize = sizeof(linkedit_data_command);
1009 dl->dataoff = _startOfDataInCode;
1010 dl->datasize = _dataInCodeSize;
1011 if (_swap)
1012 swapStruct(*dl);
1013 lc += sizeof(linkedit_data_command);
1014 }
1015 }
1016 assert(lc == &_buffer[_endOfLoadCommands]);
1017 return llvm::Error::success();
1018 }
1019
writeSectionContent()1020 void MachOFileLayout::writeSectionContent() {
1021 for (const Section &s : _file.sections) {
1022 // Copy all section content to output buffer.
1023 if (isZeroFillSection(s.type))
1024 continue;
1025 if (s.content.empty())
1026 continue;
1027 uint32_t offset = _sectInfo[&s].fileOffset;
1028 assert(offset >= _endOfLoadCommands);
1029 uint8_t *p = &_buffer[offset];
1030 memcpy(p, &s.content[0], s.content.size());
1031 p += s.content.size();
1032 }
1033 }
1034
writeRelocations()1035 void MachOFileLayout::writeRelocations() {
1036 uint32_t relOffset = _startOfRelocations;
1037 for (Section sect : _file.sections) {
1038 for (Relocation r : sect.relocations) {
1039 any_relocation_info* rb = reinterpret_cast<any_relocation_info*>(
1040 &_buffer[relOffset]);
1041 *rb = packRelocation(r, _swap, _bigEndianArch);
1042 relOffset += sizeof(any_relocation_info);
1043 }
1044 }
1045 }
1046
appendSymbols(const std::vector<Symbol> & symbols,uint32_t & symOffset,uint32_t & strOffset)1047 void MachOFileLayout::appendSymbols(const std::vector<Symbol> &symbols,
1048 uint32_t &symOffset, uint32_t &strOffset) {
1049 for (const Symbol &sym : symbols) {
1050 if (_is64) {
1051 nlist_64* nb = reinterpret_cast<nlist_64*>(&_buffer[symOffset]);
1052 nb->n_strx = strOffset - _startOfSymbolStrings;
1053 nb->n_type = sym.type | sym.scope;
1054 nb->n_sect = sym.sect;
1055 nb->n_desc = sym.desc;
1056 nb->n_value = sym.value;
1057 if (_swap)
1058 swapStruct(*nb);
1059 symOffset += sizeof(nlist_64);
1060 } else {
1061 nlist* nb = reinterpret_cast<nlist*>(&_buffer[symOffset]);
1062 nb->n_strx = strOffset - _startOfSymbolStrings;
1063 nb->n_type = sym.type | sym.scope;
1064 nb->n_sect = sym.sect;
1065 nb->n_desc = sym.desc;
1066 nb->n_value = sym.value;
1067 if (_swap)
1068 swapStruct(*nb);
1069 symOffset += sizeof(nlist);
1070 }
1071 memcpy(&_buffer[strOffset], sym.name.begin(), sym.name.size());
1072 strOffset += sym.name.size();
1073 _buffer[strOffset++] ='\0'; // Strings in table have nul terminator.
1074 }
1075 }
1076
writeFunctionStartsInfo()1077 void MachOFileLayout::writeFunctionStartsInfo() {
1078 if (!_functionStartsSize)
1079 return;
1080 memcpy(&_buffer[_startOfFunctionStarts], _file.functionStarts.data(),
1081 _functionStartsSize);
1082 }
1083
writeDataInCodeInfo()1084 void MachOFileLayout::writeDataInCodeInfo() {
1085 uint32_t offset = _startOfDataInCode;
1086 for (const DataInCode &entry : _file.dataInCode) {
1087 data_in_code_entry *dst = reinterpret_cast<data_in_code_entry*>(
1088 &_buffer[offset]);
1089 dst->offset = entry.offset;
1090 dst->length = entry.length;
1091 dst->kind = entry.kind;
1092 if (_swap)
1093 swapStruct(*dst);
1094 offset += sizeof(data_in_code_entry);
1095 }
1096 }
1097
writeSymbolTable()1098 void MachOFileLayout::writeSymbolTable() {
1099 // Write symbol table and symbol strings in parallel.
1100 uint32_t symOffset = _startOfSymbols;
1101 uint32_t strOffset = _startOfSymbolStrings;
1102 // Reserve n_strx offset of zero to mean no name.
1103 _buffer[strOffset++] = ' ';
1104 _buffer[strOffset++] = '\0';
1105 appendSymbols(_file.stabsSymbols, symOffset, strOffset);
1106 appendSymbols(_file.localSymbols, symOffset, strOffset);
1107 appendSymbols(_file.globalSymbols, symOffset, strOffset);
1108 appendSymbols(_file.undefinedSymbols, symOffset, strOffset);
1109 // Write indirect symbol table array.
1110 uint32_t *indirects = reinterpret_cast<uint32_t*>
1111 (&_buffer[_startOfIndirectSymbols]);
1112 if (_file.fileType == llvm::MachO::MH_OBJECT) {
1113 // Object files have sections in same order as input normalized file.
1114 for (const Section §ion : _file.sections) {
1115 for (uint32_t index : section.indirectSymbols) {
1116 if (_swap)
1117 *indirects++ = llvm::sys::getSwappedBytes(index);
1118 else
1119 *indirects++ = index;
1120 }
1121 }
1122 } else {
1123 // Final linked images must sort sections from normalized file.
1124 for (const Segment &seg : _file.segments) {
1125 SegExtraInfo &segInfo = _segInfo[&seg];
1126 for (const Section *section : segInfo.sections) {
1127 for (uint32_t index : section->indirectSymbols) {
1128 if (_swap)
1129 *indirects++ = llvm::sys::getSwappedBytes(index);
1130 else
1131 *indirects++ = index;
1132 }
1133 }
1134 }
1135 }
1136 }
1137
writeRebaseInfo()1138 void MachOFileLayout::writeRebaseInfo() {
1139 memcpy(&_buffer[_startOfRebaseInfo], _rebaseInfo.bytes(), _rebaseInfo.size());
1140 }
1141
writeBindingInfo()1142 void MachOFileLayout::writeBindingInfo() {
1143 memcpy(&_buffer[_startOfBindingInfo],
1144 _bindingInfo.bytes(), _bindingInfo.size());
1145 }
1146
writeLazyBindingInfo()1147 void MachOFileLayout::writeLazyBindingInfo() {
1148 memcpy(&_buffer[_startOfLazyBindingInfo],
1149 _lazyBindingInfo.bytes(), _lazyBindingInfo.size());
1150 }
1151
writeExportInfo()1152 void MachOFileLayout::writeExportInfo() {
1153 memcpy(&_buffer[_startOfExportTrie], _exportTrie.bytes(), _exportTrie.size());
1154 }
1155
buildLinkEditInfo()1156 void MachOFileLayout::buildLinkEditInfo() {
1157 buildRebaseInfo();
1158 buildBindInfo();
1159 buildLazyBindInfo();
1160 buildExportTrie();
1161 computeSymbolTableSizes();
1162 computeFunctionStartsSize();
1163 computeDataInCodeSize();
1164 }
1165
buildSectionRelocations()1166 void MachOFileLayout::buildSectionRelocations() {
1167
1168 }
1169
buildRebaseInfo()1170 void MachOFileLayout::buildRebaseInfo() {
1171 // TODO: compress rebasing info.
1172 for (const RebaseLocation& entry : _file.rebasingInfo) {
1173 _rebaseInfo.append_byte(REBASE_OPCODE_SET_TYPE_IMM | entry.kind);
1174 _rebaseInfo.append_byte(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
1175 | entry.segIndex);
1176 _rebaseInfo.append_uleb128(entry.segOffset);
1177 _rebaseInfo.append_uleb128(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1);
1178 }
1179 _rebaseInfo.append_byte(REBASE_OPCODE_DONE);
1180 _rebaseInfo.align(_is64 ? 8 : 4);
1181 }
1182
buildBindInfo()1183 void MachOFileLayout::buildBindInfo() {
1184 // TODO: compress bind info.
1185 uint64_t lastAddend = 0;
1186 int lastOrdinal = 0x80000000;
1187 StringRef lastSymbolName;
1188 BindType lastType = (BindType)0;
1189 Hex32 lastSegOffset = ~0U;
1190 uint8_t lastSegIndex = (uint8_t)~0U;
1191 for (const BindLocation& entry : _file.bindingInfo) {
1192 if (entry.ordinal != lastOrdinal) {
1193 if (entry.ordinal <= 0)
1194 _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM |
1195 (entry.ordinal & BIND_IMMEDIATE_MASK));
1196 else if (entry.ordinal <= BIND_IMMEDIATE_MASK)
1197 _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
1198 entry.ordinal);
1199 else {
1200 _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
1201 _bindingInfo.append_uleb128(entry.ordinal);
1202 }
1203 lastOrdinal = entry.ordinal;
1204 }
1205
1206 if (lastSymbolName != entry.symbolName) {
1207 _bindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM);
1208 _bindingInfo.append_string(entry.symbolName);
1209 lastSymbolName = entry.symbolName;
1210 }
1211
1212 if (lastType != entry.kind) {
1213 _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind);
1214 lastType = entry.kind;
1215 }
1216
1217 if (lastSegIndex != entry.segIndex || lastSegOffset != entry.segOffset) {
1218 _bindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
1219 | entry.segIndex);
1220 _bindingInfo.append_uleb128(entry.segOffset);
1221 lastSegIndex = entry.segIndex;
1222 lastSegOffset = entry.segOffset;
1223 }
1224 if (entry.addend != lastAddend) {
1225 _bindingInfo.append_byte(BIND_OPCODE_SET_ADDEND_SLEB);
1226 _bindingInfo.append_sleb128(entry.addend);
1227 lastAddend = entry.addend;
1228 }
1229 _bindingInfo.append_byte(BIND_OPCODE_DO_BIND);
1230 }
1231 _bindingInfo.append_byte(BIND_OPCODE_DONE);
1232 _bindingInfo.align(_is64 ? 8 : 4);
1233 }
1234
buildLazyBindInfo()1235 void MachOFileLayout::buildLazyBindInfo() {
1236 for (const BindLocation& entry : _file.lazyBindingInfo) {
1237 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
1238 | entry.segIndex);
1239 _lazyBindingInfo.append_uleb128(entry.segOffset);
1240 if (entry.ordinal <= 0)
1241 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM |
1242 (entry.ordinal & BIND_IMMEDIATE_MASK));
1243 else if (entry.ordinal <= BIND_IMMEDIATE_MASK)
1244 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
1245 entry.ordinal);
1246 else {
1247 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
1248 _lazyBindingInfo.append_uleb128(entry.ordinal);
1249 }
1250 // FIXME: We need to | the opcode here with flags.
1251 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM);
1252 _lazyBindingInfo.append_string(entry.symbolName);
1253 _lazyBindingInfo.append_byte(BIND_OPCODE_DO_BIND);
1254 _lazyBindingInfo.append_byte(BIND_OPCODE_DONE);
1255 }
1256 _lazyBindingInfo.align(_is64 ? 8 : 4);
1257 }
1258
addSymbol(const Export & entry,BumpPtrAllocator & allocator,std::vector<TrieNode * > & allNodes)1259 void TrieNode::addSymbol(const Export& entry,
1260 BumpPtrAllocator &allocator,
1261 std::vector<TrieNode*> &allNodes) {
1262 StringRef partialStr = entry.name.drop_front(_cummulativeString.size());
1263 for (TrieEdge &edge : _children) {
1264 StringRef edgeStr = edge._subString;
1265 if (partialStr.startswith(edgeStr)) {
1266 // Already have matching edge, go down that path.
1267 edge._child->addSymbol(entry, allocator, allNodes);
1268 return;
1269 }
1270 // See if string has common prefix with existing edge.
1271 for (int n=edgeStr.size()-1; n > 0; --n) {
1272 if (partialStr.substr(0, n).equals(edgeStr.substr(0, n))) {
1273 // Splice in new node: was A -> C, now A -> B -> C
1274 StringRef bNodeStr = edge._child->_cummulativeString;
1275 bNodeStr = bNodeStr.drop_back(edgeStr.size()-n).copy(allocator);
1276 auto *bNode = new (allocator) TrieNode(bNodeStr);
1277 allNodes.push_back(bNode);
1278 TrieNode* cNode = edge._child;
1279 StringRef abEdgeStr = edgeStr.substr(0,n).copy(allocator);
1280 StringRef bcEdgeStr = edgeStr.substr(n).copy(allocator);
1281 DEBUG_WITH_TYPE("trie-builder", llvm::dbgs()
1282 << "splice in TrieNode('" << bNodeStr
1283 << "') between edge '"
1284 << abEdgeStr << "' and edge='"
1285 << bcEdgeStr<< "'\n");
1286 TrieEdge& abEdge = edge;
1287 abEdge._subString = abEdgeStr;
1288 abEdge._child = bNode;
1289 auto *bcEdge = new (allocator) TrieEdge(bcEdgeStr, cNode);
1290 bNode->_children.insert(bNode->_children.end(), bcEdge);
1291 bNode->addSymbol(entry, allocator, allNodes);
1292 return;
1293 }
1294 }
1295 }
1296 if (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
1297 assert(entry.otherOffset != 0);
1298 }
1299 if (entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
1300 assert(entry.otherOffset != 0);
1301 }
1302 // No commonality with any existing child, make a new edge.
1303 auto *newNode = new (allocator) TrieNode(entry.name.copy(allocator));
1304 auto *newEdge = new (allocator) TrieEdge(partialStr, newNode);
1305 _children.insert(_children.end(), newEdge);
1306 DEBUG_WITH_TYPE("trie-builder", llvm::dbgs()
1307 << "new TrieNode('" << entry.name << "') with edge '"
1308 << partialStr << "' from node='"
1309 << _cummulativeString << "'\n");
1310 newNode->_address = entry.offset;
1311 newNode->_flags = entry.flags | entry.kind;
1312 newNode->_other = entry.otherOffset;
1313 if ((entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && !entry.otherName.empty())
1314 newNode->_importedName = entry.otherName.copy(allocator);
1315 newNode->_hasExportInfo = true;
1316 allNodes.push_back(newNode);
1317 }
1318
addOrderedNodes(const Export & entry,std::vector<TrieNode * > & orderedNodes)1319 void TrieNode::addOrderedNodes(const Export& entry,
1320 std::vector<TrieNode*> &orderedNodes) {
1321 if (!_ordered) {
1322 orderedNodes.push_back(this);
1323 _ordered = true;
1324 }
1325
1326 StringRef partialStr = entry.name.drop_front(_cummulativeString.size());
1327 for (TrieEdge &edge : _children) {
1328 StringRef edgeStr = edge._subString;
1329 if (partialStr.startswith(edgeStr)) {
1330 // Already have matching edge, go down that path.
1331 edge._child->addOrderedNodes(entry, orderedNodes);
1332 return;
1333 }
1334 }
1335 }
1336
updateOffset(uint32_t & offset)1337 bool TrieNode::updateOffset(uint32_t& offset) {
1338 uint32_t nodeSize = 1; // Length when no export info
1339 if (_hasExportInfo) {
1340 if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
1341 nodeSize = llvm::getULEB128Size(_flags);
1342 nodeSize += llvm::getULEB128Size(_other); // Other contains ordinal.
1343 nodeSize += _importedName.size();
1344 ++nodeSize; // Trailing zero in imported name.
1345 } else {
1346 nodeSize = llvm::getULEB128Size(_flags) + llvm::getULEB128Size(_address);
1347 if (_flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER)
1348 nodeSize += llvm::getULEB128Size(_other);
1349 }
1350 // Overall node size so far is uleb128 of export info + actual export info.
1351 nodeSize += llvm::getULEB128Size(nodeSize);
1352 }
1353 // Compute size of all child edges.
1354 ++nodeSize; // Byte for number of children.
1355 for (TrieEdge &edge : _children) {
1356 nodeSize += edge._subString.size() + 1 // String length.
1357 + llvm::getULEB128Size(edge._child->_trieOffset); // Offset len.
1358 }
1359 // On input, 'offset' is new prefered location for this node.
1360 bool result = (_trieOffset != offset);
1361 // Store new location in node object for use by parents.
1362 _trieOffset = offset;
1363 // Update offset for next iteration.
1364 offset += nodeSize;
1365 // Return true if _trieOffset was changed.
1366 return result;
1367 }
1368
appendToByteBuffer(ByteBuffer & out)1369 void TrieNode::appendToByteBuffer(ByteBuffer &out) {
1370 if (_hasExportInfo) {
1371 if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
1372 if (!_importedName.empty()) {
1373 // nodes with re-export info: size, flags, ordinal, import-name
1374 uint32_t nodeSize = llvm::getULEB128Size(_flags)
1375 + llvm::getULEB128Size(_other)
1376 + _importedName.size() + 1;
1377 assert(nodeSize < 256);
1378 out.append_byte(nodeSize);
1379 out.append_uleb128(_flags);
1380 out.append_uleb128(_other);
1381 out.append_string(_importedName);
1382 } else {
1383 // nodes without re-export info: size, flags, ordinal, empty-string
1384 uint32_t nodeSize = llvm::getULEB128Size(_flags)
1385 + llvm::getULEB128Size(_other) + 1;
1386 assert(nodeSize < 256);
1387 out.append_byte(nodeSize);
1388 out.append_uleb128(_flags);
1389 out.append_uleb128(_other);
1390 out.append_byte(0);
1391 }
1392 } else if ( _flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
1393 // Nodes with export info: size, flags, address, other
1394 uint32_t nodeSize = llvm::getULEB128Size(_flags)
1395 + llvm::getULEB128Size(_address)
1396 + llvm::getULEB128Size(_other);
1397 assert(nodeSize < 256);
1398 out.append_byte(nodeSize);
1399 out.append_uleb128(_flags);
1400 out.append_uleb128(_address);
1401 out.append_uleb128(_other);
1402 } else {
1403 // Nodes with export info: size, flags, address
1404 uint32_t nodeSize = llvm::getULEB128Size(_flags)
1405 + llvm::getULEB128Size(_address);
1406 assert(nodeSize < 256);
1407 out.append_byte(nodeSize);
1408 out.append_uleb128(_flags);
1409 out.append_uleb128(_address);
1410 }
1411 } else {
1412 // Node with no export info.
1413 uint32_t nodeSize = 0;
1414 out.append_byte(nodeSize);
1415 }
1416 // Add number of children.
1417 assert(_children.size() < 256);
1418 out.append_byte(_children.size());
1419 // Append each child edge substring and node offset.
1420 for (TrieEdge &edge : _children) {
1421 out.append_string(edge._subString);
1422 out.append_uleb128(edge._child->_trieOffset);
1423 }
1424 }
1425
buildExportTrie()1426 void MachOFileLayout::buildExportTrie() {
1427 if (_file.exportInfo.empty())
1428 return;
1429
1430 // For all temporary strings and objects used building trie.
1431 BumpPtrAllocator allocator;
1432
1433 // Build trie of all exported symbols.
1434 auto *rootNode = new (allocator) TrieNode(StringRef());
1435 std::vector<TrieNode*> allNodes;
1436 allNodes.reserve(_file.exportInfo.size()*2);
1437 allNodes.push_back(rootNode);
1438 for (const Export& entry : _file.exportInfo) {
1439 rootNode->addSymbol(entry, allocator, allNodes);
1440 }
1441
1442 std::vector<TrieNode*> orderedNodes;
1443 orderedNodes.reserve(allNodes.size());
1444
1445 for (const Export& entry : _file.exportInfo)
1446 rootNode->addOrderedNodes(entry, orderedNodes);
1447
1448 // Assign each node in the vector an offset in the trie stream, iterating
1449 // until all uleb128 sizes have stabilized.
1450 bool more;
1451 do {
1452 uint32_t offset = 0;
1453 more = false;
1454 for (TrieNode* node : orderedNodes) {
1455 if (node->updateOffset(offset))
1456 more = true;
1457 }
1458 } while (more);
1459
1460 // Serialize trie to ByteBuffer.
1461 for (TrieNode* node : orderedNodes) {
1462 node->appendToByteBuffer(_exportTrie);
1463 }
1464 _exportTrie.align(_is64 ? 8 : 4);
1465 }
1466
computeSymbolTableSizes()1467 void MachOFileLayout::computeSymbolTableSizes() {
1468 // MachO symbol tables have three ranges: locals, globals, and undefines
1469 const size_t nlistSize = (_is64 ? sizeof(nlist_64) : sizeof(nlist));
1470 _symbolTableSize = nlistSize * (_file.stabsSymbols.size()
1471 + _file.localSymbols.size()
1472 + _file.globalSymbols.size()
1473 + _file.undefinedSymbols.size());
1474 // Always reserve 1-byte for the empty string and 1-byte for its terminator.
1475 _symbolStringPoolSize = 2;
1476 for (const Symbol &sym : _file.stabsSymbols) {
1477 _symbolStringPoolSize += (sym.name.size()+1);
1478 }
1479 for (const Symbol &sym : _file.localSymbols) {
1480 _symbolStringPoolSize += (sym.name.size()+1);
1481 }
1482 for (const Symbol &sym : _file.globalSymbols) {
1483 _symbolStringPoolSize += (sym.name.size()+1);
1484 }
1485 for (const Symbol &sym : _file.undefinedSymbols) {
1486 _symbolStringPoolSize += (sym.name.size()+1);
1487 }
1488 _symbolTableLocalsStartIndex = 0;
1489 _symbolTableGlobalsStartIndex = _file.stabsSymbols.size() +
1490 _file.localSymbols.size();
1491 _symbolTableUndefinesStartIndex = _symbolTableGlobalsStartIndex
1492 + _file.globalSymbols.size();
1493
1494 _indirectSymbolTableCount = 0;
1495 for (const Section § : _file.sections) {
1496 _indirectSymbolTableCount += sect.indirectSymbols.size();
1497 }
1498 }
1499
computeFunctionStartsSize()1500 void MachOFileLayout::computeFunctionStartsSize() {
1501 _functionStartsSize = _file.functionStarts.size();
1502 }
1503
computeDataInCodeSize()1504 void MachOFileLayout::computeDataInCodeSize() {
1505 _dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry);
1506 }
1507
writeLinkEditContent()1508 void MachOFileLayout::writeLinkEditContent() {
1509 if (_file.fileType == llvm::MachO::MH_OBJECT) {
1510 writeRelocations();
1511 writeFunctionStartsInfo();
1512 writeDataInCodeInfo();
1513 writeSymbolTable();
1514 } else {
1515 writeRebaseInfo();
1516 writeBindingInfo();
1517 writeLazyBindingInfo();
1518 // TODO: add weak binding info
1519 writeExportInfo();
1520 writeFunctionStartsInfo();
1521 writeDataInCodeInfo();
1522 writeSymbolTable();
1523 }
1524 }
1525
writeBinary(StringRef path)1526 llvm::Error MachOFileLayout::writeBinary(StringRef path) {
1527 // Check for pending error from constructor.
1528 if (_ec)
1529 return llvm::errorCodeToError(_ec);
1530 // Create FileOutputBuffer with calculated size.
1531 unsigned flags = 0;
1532 if (_file.fileType != llvm::MachO::MH_OBJECT)
1533 flags = llvm::FileOutputBuffer::F_executable;
1534 Expected<std::unique_ptr<llvm::FileOutputBuffer>> fobOrErr =
1535 llvm::FileOutputBuffer::create(path, size(), flags);
1536 if (Error E = fobOrErr.takeError())
1537 return E;
1538 std::unique_ptr<llvm::FileOutputBuffer> &fob = *fobOrErr;
1539 // Write content.
1540 _buffer = fob->getBufferStart();
1541 writeMachHeader();
1542 if (auto ec = writeLoadCommands())
1543 return ec;
1544 writeSectionContent();
1545 writeLinkEditContent();
1546 if (Error E = fob->commit())
1547 return E;
1548
1549 return llvm::Error::success();
1550 }
1551
1552 /// Takes in-memory normalized view and writes a mach-o object file.
writeBinary(const NormalizedFile & file,StringRef path)1553 llvm::Error writeBinary(const NormalizedFile &file, StringRef path) {
1554 MachOFileLayout layout(file, false);
1555 return layout.writeBinary(path);
1556 }
1557
1558 } // namespace normalized
1559 } // namespace mach_o
1560 } // namespace lld
1561