1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/ADT/iterator_range.h"
12 #include "llvm/BinaryFormat/MachO.h"
13 #include "llvm/MC/MCAsmBackend.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCAssembler.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDirectives.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCFixupKindInfo.h"
20 #include "llvm/MC/MCFragment.h"
21 #include "llvm/MC/MCMachObjectWriter.h"
22 #include "llvm/MC/MCObjectFileInfo.h"
23 #include "llvm/MC/MCObjectWriter.h"
24 #include "llvm/MC/MCSection.h"
25 #include "llvm/MC/MCSectionMachO.h"
26 #include "llvm/MC/MCSymbol.h"
27 #include "llvm/MC/MCSymbolMachO.h"
28 #include "llvm/MC/MCValue.h"
29 #include "llvm/Support/Alignment.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/LEB128.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <cassert>
38 #include <cstdint>
39 #include <string>
40 #include <utility>
41 #include <vector>
42 
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "mc"
46 
47 void MachObjectWriter::reset() {
48   Relocations.clear();
49   IndirectSymBase.clear();
50   StringTable.clear();
51   LocalSymbolData.clear();
52   ExternalSymbolData.clear();
53   UndefinedSymbolData.clear();
54   MCObjectWriter::reset();
55 }
56 
57 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) {
58   // Undefined symbols are always extern.
59   if (S.isUndefined())
60     return true;
61 
62   // References to weak definitions require external relocation entries; the
63   // definition may not always be the one in the same object file.
64   if (cast<MCSymbolMachO>(S).isWeakDefinition())
65     return true;
66 
67   // Otherwise, we can use an internal relocation.
68   return false;
69 }
70 
71 bool MachObjectWriter::
72 MachSymbolData::operator<(const MachSymbolData &RHS) const {
73   return Symbol->getName() < RHS.Symbol->getName();
74 }
75 
76 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
77   const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
78     (MCFixupKind) Kind);
79 
80   return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
81 }
82 
83 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
84                                               const MCAsmLayout &Layout) const {
85   return getSectionAddress(Fragment->getParent()) +
86          Layout.getFragmentOffset(Fragment);
87 }
88 
89 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
90                                             const MCAsmLayout &Layout) const {
91   // If this is a variable, then recursively evaluate now.
92   if (S.isVariable()) {
93     if (const MCConstantExpr *C =
94           dyn_cast<const MCConstantExpr>(S.getVariableValue()))
95       return C->getValue();
96 
97     MCValue Target;
98     if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
99       report_fatal_error("unable to evaluate offset for variable '" +
100                          S.getName() + "'");
101 
102     // Verify that any used symbols are defined.
103     if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
104       report_fatal_error("unable to evaluate offset to undefined symbol '" +
105                          Target.getSymA()->getSymbol().getName() + "'");
106     if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
107       report_fatal_error("unable to evaluate offset to undefined symbol '" +
108                          Target.getSymB()->getSymbol().getName() + "'");
109 
110     uint64_t Address = Target.getConstant();
111     if (Target.getSymA())
112       Address += getSymbolAddress(Target.getSymA()->getSymbol(), Layout);
113     if (Target.getSymB())
114       Address += getSymbolAddress(Target.getSymB()->getSymbol(), Layout);
115     return Address;
116   }
117 
118   return getSectionAddress(S.getFragment()->getParent()) +
119          Layout.getSymbolOffset(S);
120 }
121 
122 uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec,
123                                           const MCAsmLayout &Layout) const {
124   uint64_t EndAddr = getSectionAddress(Sec) + Layout.getSectionAddressSize(Sec);
125   unsigned Next = Sec->getLayoutOrder() + 1;
126   if (Next >= Layout.getSectionOrder().size())
127     return 0;
128 
129   const MCSection &NextSec = *Layout.getSectionOrder()[Next];
130   if (NextSec.isVirtualSection())
131     return 0;
132   return offsetToAlignment(EndAddr, Align(NextSec.getAlignment()));
133 }
134 
135 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type,
136                                    unsigned NumLoadCommands,
137                                    unsigned LoadCommandsSize,
138                                    bool SubsectionsViaSymbols) {
139   uint32_t Flags = 0;
140 
141   if (SubsectionsViaSymbols)
142     Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
143 
144   // struct mach_header (28 bytes) or
145   // struct mach_header_64 (32 bytes)
146 
147   uint64_t Start = W.OS.tell();
148   (void) Start;
149 
150   W.write<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
151 
152   W.write<uint32_t>(TargetObjectWriter->getCPUType());
153   W.write<uint32_t>(TargetObjectWriter->getCPUSubtype());
154 
155   W.write<uint32_t>(Type);
156   W.write<uint32_t>(NumLoadCommands);
157   W.write<uint32_t>(LoadCommandsSize);
158   W.write<uint32_t>(Flags);
159   if (is64Bit())
160     W.write<uint32_t>(0); // reserved
161 
162   assert(W.OS.tell() - Start == (is64Bit() ? sizeof(MachO::mach_header_64)
163                                            : sizeof(MachO::mach_header)));
164 }
165 
166 void MachObjectWriter::writeWithPadding(StringRef Str, uint64_t Size) {
167   assert(Size >= Str.size());
168   W.OS << Str;
169   W.OS.write_zeros(Size - Str.size());
170 }
171 
172 /// writeSegmentLoadCommand - Write a segment load command.
173 ///
174 /// \param NumSections The number of sections in this segment.
175 /// \param SectionDataSize The total size of the sections.
176 void MachObjectWriter::writeSegmentLoadCommand(
177     StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize,
178     uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt,
179     uint32_t InitProt) {
180   // struct segment_command (56 bytes) or
181   // struct segment_command_64 (72 bytes)
182 
183   uint64_t Start = W.OS.tell();
184   (void) Start;
185 
186   unsigned SegmentLoadCommandSize =
187     is64Bit() ? sizeof(MachO::segment_command_64):
188     sizeof(MachO::segment_command);
189   W.write<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT);
190   W.write<uint32_t>(SegmentLoadCommandSize +
191           NumSections * (is64Bit() ? sizeof(MachO::section_64) :
192                          sizeof(MachO::section)));
193 
194   writeWithPadding(Name, 16);
195   if (is64Bit()) {
196     W.write<uint64_t>(VMAddr);                 // vmaddr
197     W.write<uint64_t>(VMSize); // vmsize
198     W.write<uint64_t>(SectionDataStartOffset); // file offset
199     W.write<uint64_t>(SectionDataSize); // file size
200   } else {
201     W.write<uint32_t>(VMAddr);                 // vmaddr
202     W.write<uint32_t>(VMSize); // vmsize
203     W.write<uint32_t>(SectionDataStartOffset); // file offset
204     W.write<uint32_t>(SectionDataSize); // file size
205   }
206   // maxprot
207   W.write<uint32_t>(MaxProt);
208   // initprot
209   W.write<uint32_t>(InitProt);
210   W.write<uint32_t>(NumSections);
211   W.write<uint32_t>(0); // flags
212 
213   assert(W.OS.tell() - Start == SegmentLoadCommandSize);
214 }
215 
216 void MachObjectWriter::writeSection(const MCAsmLayout &Layout,
217                                     const MCSection &Sec, uint64_t VMAddr,
218                                     uint64_t FileOffset, unsigned Flags,
219                                     uint64_t RelocationsStart,
220                                     unsigned NumRelocations) {
221   uint64_t SectionSize = Layout.getSectionAddressSize(&Sec);
222   const MCSectionMachO &Section = cast<MCSectionMachO>(Sec);
223 
224   // The offset is unused for virtual sections.
225   if (Section.isVirtualSection()) {
226     assert(Layout.getSectionFileSize(&Sec) == 0 && "Invalid file size!");
227     FileOffset = 0;
228   }
229 
230   // struct section (68 bytes) or
231   // struct section_64 (80 bytes)
232 
233   uint64_t Start = W.OS.tell();
234   (void) Start;
235 
236   writeWithPadding(Section.getName(), 16);
237   writeWithPadding(Section.getSegmentName(), 16);
238   if (is64Bit()) {
239     W.write<uint64_t>(VMAddr);      // address
240     W.write<uint64_t>(SectionSize); // size
241   } else {
242     W.write<uint32_t>(VMAddr);      // address
243     W.write<uint32_t>(SectionSize); // size
244   }
245   W.write<uint32_t>(FileOffset);
246 
247   assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!");
248   W.write<uint32_t>(Log2_32(Section.getAlignment()));
249   W.write<uint32_t>(NumRelocations ? RelocationsStart : 0);
250   W.write<uint32_t>(NumRelocations);
251   W.write<uint32_t>(Flags);
252   W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1
253   W.write<uint32_t>(Section.getStubSize()); // reserved2
254   if (is64Bit())
255     W.write<uint32_t>(0); // reserved3
256 
257   assert(W.OS.tell() - Start ==
258          (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section)));
259 }
260 
261 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
262                                               uint32_t NumSymbols,
263                                               uint32_t StringTableOffset,
264                                               uint32_t StringTableSize) {
265   // struct symtab_command (24 bytes)
266 
267   uint64_t Start = W.OS.tell();
268   (void) Start;
269 
270   W.write<uint32_t>(MachO::LC_SYMTAB);
271   W.write<uint32_t>(sizeof(MachO::symtab_command));
272   W.write<uint32_t>(SymbolOffset);
273   W.write<uint32_t>(NumSymbols);
274   W.write<uint32_t>(StringTableOffset);
275   W.write<uint32_t>(StringTableSize);
276 
277   assert(W.OS.tell() - Start == sizeof(MachO::symtab_command));
278 }
279 
280 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
281                                                 uint32_t NumLocalSymbols,
282                                                 uint32_t FirstExternalSymbol,
283                                                 uint32_t NumExternalSymbols,
284                                                 uint32_t FirstUndefinedSymbol,
285                                                 uint32_t NumUndefinedSymbols,
286                                                 uint32_t IndirectSymbolOffset,
287                                                 uint32_t NumIndirectSymbols) {
288   // struct dysymtab_command (80 bytes)
289 
290   uint64_t Start = W.OS.tell();
291   (void) Start;
292 
293   W.write<uint32_t>(MachO::LC_DYSYMTAB);
294   W.write<uint32_t>(sizeof(MachO::dysymtab_command));
295   W.write<uint32_t>(FirstLocalSymbol);
296   W.write<uint32_t>(NumLocalSymbols);
297   W.write<uint32_t>(FirstExternalSymbol);
298   W.write<uint32_t>(NumExternalSymbols);
299   W.write<uint32_t>(FirstUndefinedSymbol);
300   W.write<uint32_t>(NumUndefinedSymbols);
301   W.write<uint32_t>(0); // tocoff
302   W.write<uint32_t>(0); // ntoc
303   W.write<uint32_t>(0); // modtaboff
304   W.write<uint32_t>(0); // nmodtab
305   W.write<uint32_t>(0); // extrefsymoff
306   W.write<uint32_t>(0); // nextrefsyms
307   W.write<uint32_t>(IndirectSymbolOffset);
308   W.write<uint32_t>(NumIndirectSymbols);
309   W.write<uint32_t>(0); // extreloff
310   W.write<uint32_t>(0); // nextrel
311   W.write<uint32_t>(0); // locreloff
312   W.write<uint32_t>(0); // nlocrel
313 
314   assert(W.OS.tell() - Start == sizeof(MachO::dysymtab_command));
315 }
316 
317 MachObjectWriter::MachSymbolData *
318 MachObjectWriter::findSymbolData(const MCSymbol &Sym) {
319   for (auto *SymbolData :
320        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
321     for (MachSymbolData &Entry : *SymbolData)
322       if (Entry.Symbol == &Sym)
323         return &Entry;
324 
325   return nullptr;
326 }
327 
328 const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const {
329   const MCSymbol *S = &Sym;
330   while (S->isVariable()) {
331     const MCExpr *Value = S->getVariableValue();
332     const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value);
333     if (!Ref)
334       return *S;
335     S = &Ref->getSymbol();
336   }
337   return *S;
338 }
339 
340 void MachObjectWriter::writeNlist(MachSymbolData &MSD,
341                                   const MCAsmLayout &Layout) {
342   const MCSymbol *Symbol = MSD.Symbol;
343   const MCSymbol &Data = *Symbol;
344   const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol);
345   uint8_t SectionIndex = MSD.SectionIndex;
346   uint8_t Type = 0;
347   uint64_t Address = 0;
348   bool IsAlias = Symbol != AliasedSymbol;
349 
350   const MCSymbol &OrigSymbol = *Symbol;
351   MachSymbolData *AliaseeInfo;
352   if (IsAlias) {
353     AliaseeInfo = findSymbolData(*AliasedSymbol);
354     if (AliaseeInfo)
355       SectionIndex = AliaseeInfo->SectionIndex;
356     Symbol = AliasedSymbol;
357     // FIXME: Should this update Data as well?
358   }
359 
360   // Set the N_TYPE bits. See <mach-o/nlist.h>.
361   //
362   // FIXME: Are the prebound or indirect fields possible here?
363   if (IsAlias && Symbol->isUndefined())
364     Type = MachO::N_INDR;
365   else if (Symbol->isUndefined())
366     Type = MachO::N_UNDF;
367   else if (Symbol->isAbsolute())
368     Type = MachO::N_ABS;
369   else
370     Type = MachO::N_SECT;
371 
372   // FIXME: Set STAB bits.
373 
374   if (Data.isPrivateExtern())
375     Type |= MachO::N_PEXT;
376 
377   // Set external bit.
378   if (Data.isExternal() || (!IsAlias && Symbol->isUndefined()))
379     Type |= MachO::N_EXT;
380 
381   // Compute the symbol address.
382   if (IsAlias && Symbol->isUndefined())
383     Address = AliaseeInfo->StringIndex;
384   else if (Symbol->isDefined())
385     Address = getSymbolAddress(OrigSymbol, Layout);
386   else if (Symbol->isCommon()) {
387     // Common symbols are encoded with the size in the address
388     // field, and their alignment in the flags.
389     Address = Symbol->getCommonSize();
390   }
391 
392   // struct nlist (12 bytes)
393 
394   W.write<uint32_t>(MSD.StringIndex);
395   W.OS << char(Type);
396   W.OS << char(SectionIndex);
397 
398   // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
399   // value.
400   bool EncodeAsAltEntry =
401     IsAlias && cast<MCSymbolMachO>(OrigSymbol).isAltEntry();
402   W.write<uint16_t>(cast<MCSymbolMachO>(Symbol)->getEncodedFlags(EncodeAsAltEntry));
403   if (is64Bit())
404     W.write<uint64_t>(Address);
405   else
406     W.write<uint32_t>(Address);
407 }
408 
409 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
410                                                 uint32_t DataOffset,
411                                                 uint32_t DataSize) {
412   uint64_t Start = W.OS.tell();
413   (void) Start;
414 
415   W.write<uint32_t>(Type);
416   W.write<uint32_t>(sizeof(MachO::linkedit_data_command));
417   W.write<uint32_t>(DataOffset);
418   W.write<uint32_t>(DataSize);
419 
420   assert(W.OS.tell() - Start == sizeof(MachO::linkedit_data_command));
421 }
422 
423 static unsigned ComputeLinkerOptionsLoadCommandSize(
424   const std::vector<std::string> &Options, bool is64Bit)
425 {
426   unsigned Size = sizeof(MachO::linker_option_command);
427   for (const std::string &Option : Options)
428     Size += Option.size() + 1;
429   return alignTo(Size, is64Bit ? 8 : 4);
430 }
431 
432 void MachObjectWriter::writeLinkerOptionsLoadCommand(
433   const std::vector<std::string> &Options)
434 {
435   unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
436   uint64_t Start = W.OS.tell();
437   (void) Start;
438 
439   W.write<uint32_t>(MachO::LC_LINKER_OPTION);
440   W.write<uint32_t>(Size);
441   W.write<uint32_t>(Options.size());
442   uint64_t BytesWritten = sizeof(MachO::linker_option_command);
443   for (const std::string &Option : Options) {
444     // Write each string, including the null byte.
445     W.OS << Option << '\0';
446     BytesWritten += Option.size() + 1;
447   }
448 
449   // Pad to a multiple of the pointer size.
450   W.OS.write_zeros(
451       offsetToAlignment(BytesWritten, is64Bit() ? Align(8) : Align(4)));
452 
453   assert(W.OS.tell() - Start == Size);
454 }
455 
456 static bool isFixupTargetValid(const MCValue &Target) {
457   // Target is (LHS - RHS + cst).
458   // We don't support the form where LHS is null: -RHS + cst
459   if (!Target.getSymA() && Target.getSymB())
460     return false;
461   return true;
462 }
463 
464 void MachObjectWriter::recordRelocation(MCAssembler &Asm,
465                                         const MCAsmLayout &Layout,
466                                         const MCFragment *Fragment,
467                                         const MCFixup &Fixup, MCValue Target,
468                                         uint64_t &FixedValue) {
469   if (!isFixupTargetValid(Target)) {
470     Asm.getContext().reportError(Fixup.getLoc(),
471                                  "unsupported relocation expression");
472     return;
473   }
474 
475   TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup,
476                                        Target, FixedValue);
477 }
478 
479 void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
480   // This is the point where 'as' creates actual symbols for indirect symbols
481   // (in the following two passes). It would be easier for us to do this sooner
482   // when we see the attribute, but that makes getting the order in the symbol
483   // table much more complicated than it is worth.
484   //
485   // FIXME: Revisit this when the dust settles.
486 
487   // Report errors for use of .indirect_symbol not in a symbol pointer section
488   // or stub section.
489   for (IndirectSymbolData &ISD : llvm::make_range(Asm.indirect_symbol_begin(),
490                                                   Asm.indirect_symbol_end())) {
491     const MCSectionMachO &Section = cast<MCSectionMachO>(*ISD.Section);
492 
493     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
494         Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
495         Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS &&
496         Section.getType() != MachO::S_SYMBOL_STUBS) {
497       MCSymbol &Symbol = *ISD.Symbol;
498       report_fatal_error("indirect symbol '" + Symbol.getName() +
499                          "' not in a symbol pointer or stub section");
500     }
501   }
502 
503   // Bind non-lazy symbol pointers first.
504   unsigned IndirectIndex = 0;
505   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
506          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
507     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
508 
509     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
510         Section.getType() !=  MachO::S_THREAD_LOCAL_VARIABLE_POINTERS)
511       continue;
512 
513     // Initialize the section indirect symbol base, if necessary.
514     IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
515 
516     Asm.registerSymbol(*it->Symbol);
517   }
518 
519   // Then lazy symbol pointers and symbol stubs.
520   IndirectIndex = 0;
521   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
522          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
523     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
524 
525     if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
526         Section.getType() != MachO::S_SYMBOL_STUBS)
527       continue;
528 
529     // Initialize the section indirect symbol base, if necessary.
530     IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
531 
532     // Set the symbol type to undefined lazy, but only on construction.
533     //
534     // FIXME: Do not hardcode.
535     bool Created;
536     Asm.registerSymbol(*it->Symbol, &Created);
537     if (Created)
538       cast<MCSymbolMachO>(it->Symbol)->setReferenceTypeUndefinedLazy(true);
539   }
540 }
541 
542 /// computeSymbolTable - Compute the symbol table data
543 void MachObjectWriter::computeSymbolTable(
544     MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
545     std::vector<MachSymbolData> &ExternalSymbolData,
546     std::vector<MachSymbolData> &UndefinedSymbolData) {
547   // Build section lookup table.
548   DenseMap<const MCSection*, uint8_t> SectionIndexMap;
549   unsigned Index = 1;
550   for (MCAssembler::iterator it = Asm.begin(),
551          ie = Asm.end(); it != ie; ++it, ++Index)
552     SectionIndexMap[&*it] = Index;
553   assert(Index <= 256 && "Too many sections!");
554 
555   // Build the string table.
556   for (const MCSymbol &Symbol : Asm.symbols()) {
557     if (!Asm.isSymbolLinkerVisible(Symbol))
558       continue;
559 
560     StringTable.add(Symbol.getName());
561   }
562   StringTable.finalize();
563 
564   // Build the symbol arrays but only for non-local symbols.
565   //
566   // The particular order that we collect and then sort the symbols is chosen to
567   // match 'as'. Even though it doesn't matter for correctness, this is
568   // important for letting us diff .o files.
569   for (const MCSymbol &Symbol : Asm.symbols()) {
570     // Ignore non-linker visible symbols.
571     if (!Asm.isSymbolLinkerVisible(Symbol))
572       continue;
573 
574     if (!Symbol.isExternal() && !Symbol.isUndefined())
575       continue;
576 
577     MachSymbolData MSD;
578     MSD.Symbol = &Symbol;
579     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
580 
581     if (Symbol.isUndefined()) {
582       MSD.SectionIndex = 0;
583       UndefinedSymbolData.push_back(MSD);
584     } else if (Symbol.isAbsolute()) {
585       MSD.SectionIndex = 0;
586       ExternalSymbolData.push_back(MSD);
587     } else {
588       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
589       assert(MSD.SectionIndex && "Invalid section index!");
590       ExternalSymbolData.push_back(MSD);
591     }
592   }
593 
594   // Now add the data for local symbols.
595   for (const MCSymbol &Symbol : Asm.symbols()) {
596     // Ignore non-linker visible symbols.
597     if (!Asm.isSymbolLinkerVisible(Symbol))
598       continue;
599 
600     if (Symbol.isExternal() || Symbol.isUndefined())
601       continue;
602 
603     MachSymbolData MSD;
604     MSD.Symbol = &Symbol;
605     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
606 
607     if (Symbol.isAbsolute()) {
608       MSD.SectionIndex = 0;
609       LocalSymbolData.push_back(MSD);
610     } else {
611       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
612       assert(MSD.SectionIndex && "Invalid section index!");
613       LocalSymbolData.push_back(MSD);
614     }
615   }
616 
617   // External and undefined symbols are required to be in lexicographic order.
618   llvm::sort(ExternalSymbolData);
619   llvm::sort(UndefinedSymbolData);
620 
621   // Set the symbol indices.
622   Index = 0;
623   for (auto *SymbolData :
624        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
625     for (MachSymbolData &Entry : *SymbolData)
626       Entry.Symbol->setIndex(Index++);
627 
628   for (const MCSection &Section : Asm) {
629     for (RelAndSymbol &Rel : Relocations[&Section]) {
630       if (!Rel.Sym)
631         continue;
632 
633       // Set the Index and the IsExtern bit.
634       unsigned Index = Rel.Sym->getIndex();
635       assert(isInt<24>(Index));
636       if (W.Endian == support::little)
637         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27);
638       else
639         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
640     }
641   }
642 }
643 
644 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
645                                                const MCAsmLayout &Layout) {
646   uint64_t StartAddress = 0;
647   for (const MCSection *Sec : Layout.getSectionOrder()) {
648     StartAddress = alignTo(StartAddress, Sec->getAlignment());
649     SectionAddress[Sec] = StartAddress;
650     StartAddress += Layout.getSectionAddressSize(Sec);
651 
652     // Explicitly pad the section to match the alignment requirements of the
653     // following one. This is for 'gas' compatibility, it shouldn't
654     /// strictly be necessary.
655     StartAddress += getPaddingSize(Sec, Layout);
656   }
657 }
658 
659 void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
660                                                 const MCAsmLayout &Layout) {
661   computeSectionAddresses(Asm, Layout);
662 
663   // Create symbol data for any indirect symbols.
664   bindIndirectSymbols(Asm);
665 }
666 
667 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
668     const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B,
669     bool InSet) const {
670   // FIXME: We don't handle things like
671   // foo = .
672   // creating atoms.
673   if (A.isVariable() || B.isVariable())
674     return false;
675   return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, A, B,
676                                                                 InSet);
677 }
678 
679 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
680     const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
681     bool InSet, bool IsPCRel) const {
682   if (InSet)
683     return true;
684 
685   // The effective address is
686   //     addr(atom(A)) + offset(A)
687   //   - addr(atom(B)) - offset(B)
688   // and the offsets are not relocatable, so the fixup is fully resolved when
689   //  addr(atom(A)) - addr(atom(B)) == 0.
690   const MCSymbol &SA = findAliasedSymbol(SymA);
691   const MCSection &SecA = SA.getSection();
692   const MCSection &SecB = *FB.getParent();
693 
694   if (IsPCRel) {
695     // The simple (Darwin, except on x86_64) way of dealing with this was to
696     // assume that any reference to a temporary symbol *must* be a temporary
697     // symbol in the same atom, unless the sections differ. Therefore, any PCrel
698     // relocation to a temporary symbol (in the same section) is fully
699     // resolved. This also works in conjunction with absolutized .set, which
700     // requires the compiler to use .set to absolutize the differences between
701     // symbols which the compiler knows to be assembly time constants, so we
702     // don't need to worry about considering symbol differences fully resolved.
703     //
704     // If the file isn't using sub-sections-via-symbols, we can make the
705     // same assumptions about any symbol that we normally make about
706     // assembler locals.
707 
708     bool hasReliableSymbolDifference = isX86_64();
709     if (!hasReliableSymbolDifference) {
710       if (!SA.isInSection() || &SecA != &SecB ||
711           (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() &&
712            Asm.getSubsectionsViaSymbols()))
713         return false;
714       return true;
715     }
716     // For Darwin x86_64, there is one special case when the reference IsPCRel.
717     // If the fragment with the reference does not have a base symbol but meets
718     // the simple way of dealing with this, in that it is a temporary symbol in
719     // the same atom then it is assumed to be fully resolved.  This is needed so
720     // a relocation entry is not created and so the static linker does not
721     // mess up the reference later.
722     else if(!FB.getAtom() &&
723             SA.isTemporary() && SA.isInSection() && &SecA == &SecB){
724       return true;
725     }
726   }
727 
728   // If they are not in the same section, we can't compute the diff.
729   if (&SecA != &SecB)
730     return false;
731 
732   const MCFragment *FA = SA.getFragment();
733 
734   // Bail if the symbol has no fragment.
735   if (!FA)
736     return false;
737 
738   // If the atoms are the same, they are guaranteed to have the same address.
739   if (FA->getAtom() == FB.getAtom())
740     return true;
741 
742   // Otherwise, we can't prove this is fully resolved.
743   return false;
744 }
745 
746 static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) {
747   switch (Type) {
748   case MCVM_OSXVersionMin:     return MachO::LC_VERSION_MIN_MACOSX;
749   case MCVM_IOSVersionMin:     return MachO::LC_VERSION_MIN_IPHONEOS;
750   case MCVM_TvOSVersionMin:    return MachO::LC_VERSION_MIN_TVOS;
751   case MCVM_WatchOSVersionMin: return MachO::LC_VERSION_MIN_WATCHOS;
752   }
753   llvm_unreachable("Invalid mc version min type");
754 }
755 
756 void MachObjectWriter::populateAddrSigSection(MCAssembler &Asm) {
757   MCSection *AddrSigSection =
758       Asm.getContext().getObjectFileInfo()->getAddrSigSection();
759   unsigned Log2Size = is64Bit() ? 3 : 2;
760   for (const MCSymbol *S : getAddrsigSyms()) {
761     MachO::any_relocation_info MRE;
762     MRE.r_word0 = 0;
763     MRE.r_word1 = (Log2Size << 25) | (MachO::GENERIC_RELOC_VANILLA << 28);
764     addRelocation(S, AddrSigSection, MRE);
765   }
766 }
767 
768 uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
769                                        const MCAsmLayout &Layout) {
770   uint64_t StartOffset = W.OS.tell();
771 
772   populateAddrSigSection(Asm);
773 
774   // Compute symbol table information and bind symbol indices.
775   computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
776                      UndefinedSymbolData);
777 
778   if (!Asm.CGProfile.empty()) {
779     MCSection *CGProfileSection = Asm.getContext().getMachOSection(
780         "__LLVM", "__cg_profile", 0, SectionKind::getMetadata());
781     MCDataFragment *Frag = dyn_cast_or_null<MCDataFragment>(
782         &*CGProfileSection->getFragmentList().begin());
783     assert(Frag && "call graph profile section not reserved");
784     Frag->getContents().clear();
785     raw_svector_ostream OS(Frag->getContents());
786     for (const MCAssembler::CGProfileEntry &CGPE : Asm.CGProfile) {
787       uint32_t FromIndex = CGPE.From->getSymbol().getIndex();
788       uint32_t ToIndex = CGPE.To->getSymbol().getIndex();
789       support::endian::write(OS, FromIndex, W.Endian);
790       support::endian::write(OS, ToIndex, W.Endian);
791       support::endian::write(OS, CGPE.Count, W.Endian);
792     }
793   }
794 
795   unsigned NumSections = Asm.size();
796   const MCAssembler::VersionInfoType &VersionInfo =
797     Layout.getAssembler().getVersionInfo();
798 
799   // The section data starts after the header, the segment load command (and
800   // section headers) and the symbol table.
801   unsigned NumLoadCommands = 1;
802   uint64_t LoadCommandsSize = is64Bit() ?
803     sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64):
804     sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
805 
806   // Add the deployment target version info load command size, if used.
807   if (VersionInfo.Major != 0) {
808     ++NumLoadCommands;
809     if (VersionInfo.EmitBuildVersion)
810       LoadCommandsSize += sizeof(MachO::build_version_command);
811     else
812       LoadCommandsSize += sizeof(MachO::version_min_command);
813   }
814 
815   const MCAssembler::VersionInfoType &TargetVariantVersionInfo =
816       Layout.getAssembler().getDarwinTargetVariantVersionInfo();
817 
818   // Add the target variant version info load command size, if used.
819   if (TargetVariantVersionInfo.Major != 0) {
820     ++NumLoadCommands;
821     assert(TargetVariantVersionInfo.EmitBuildVersion &&
822            "target variant should use build version");
823     LoadCommandsSize += sizeof(MachO::build_version_command);
824   }
825 
826   // Add the data-in-code load command size, if used.
827   unsigned NumDataRegions = Asm.getDataRegions().size();
828   if (NumDataRegions) {
829     ++NumLoadCommands;
830     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
831   }
832 
833   // Add the loh load command size, if used.
834   uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout);
835   uint64_t LOHSize = alignTo(LOHRawSize, is64Bit() ? 8 : 4);
836   if (LOHSize) {
837     ++NumLoadCommands;
838     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
839   }
840 
841   // Add the symbol table load command sizes, if used.
842   unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
843     UndefinedSymbolData.size();
844   if (NumSymbols) {
845     NumLoadCommands += 2;
846     LoadCommandsSize += (sizeof(MachO::symtab_command) +
847                          sizeof(MachO::dysymtab_command));
848   }
849 
850   // Add the linker option load commands sizes.
851   for (const auto &Option : Asm.getLinkerOptions()) {
852     ++NumLoadCommands;
853     LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit());
854   }
855 
856   // Compute the total size of the section data, as well as its file size and vm
857   // size.
858   uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) :
859                                sizeof(MachO::mach_header)) + LoadCommandsSize;
860   uint64_t SectionDataSize = 0;
861   uint64_t SectionDataFileSize = 0;
862   uint64_t VMSize = 0;
863   for (const MCSection &Sec : Asm) {
864     uint64_t Address = getSectionAddress(&Sec);
865     uint64_t Size = Layout.getSectionAddressSize(&Sec);
866     uint64_t FileSize = Layout.getSectionFileSize(&Sec);
867     FileSize += getPaddingSize(&Sec, Layout);
868 
869     VMSize = std::max(VMSize, Address + Size);
870 
871     if (Sec.isVirtualSection())
872       continue;
873 
874     SectionDataSize = std::max(SectionDataSize, Address + Size);
875     SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
876   }
877 
878   // The section data is padded to pointer size bytes.
879   //
880   // FIXME: Is this machine dependent?
881   unsigned SectionDataPadding =
882       offsetToAlignment(SectionDataFileSize, is64Bit() ? Align(8) : Align(4));
883   SectionDataFileSize += SectionDataPadding;
884 
885   // Write the prolog, starting with the header and load command...
886   writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize,
887               Asm.getSubsectionsViaSymbols());
888   uint32_t Prot =
889       MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
890   writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart,
891                           SectionDataSize, Prot, Prot);
892 
893   // ... and then the section headers.
894   uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
895   for (const MCSection &Section : Asm) {
896     const auto &Sec = cast<MCSectionMachO>(Section);
897     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
898     unsigned NumRelocs = Relocs.size();
899     uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
900     unsigned Flags = Sec.getTypeAndAttributes();
901     if (Sec.hasInstructions())
902       Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
903     writeSection(Layout, Sec, getSectionAddress(&Sec), SectionStart, Flags,
904                  RelocTableEnd, NumRelocs);
905     RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
906   }
907 
908   // Write out the deployment target information, if it's available.
909   auto EmitDeploymentTargetVersion =
910       [&](const MCAssembler::VersionInfoType &VersionInfo) {
911         auto EncodeVersion = [](VersionTuple V) -> uint32_t {
912           assert(!V.empty() && "empty version");
913           unsigned Update = V.getSubminor().value_or(0);
914           unsigned Minor = V.getMinor().value_or(0);
915           assert(Update < 256 && "unencodable update target version");
916           assert(Minor < 256 && "unencodable minor target version");
917           assert(V.getMajor() < 65536 && "unencodable major target version");
918           return Update | (Minor << 8) | (V.getMajor() << 16);
919         };
920         uint32_t EncodedVersion = EncodeVersion(VersionTuple(
921             VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update));
922         uint32_t SDKVersion = !VersionInfo.SDKVersion.empty()
923                                   ? EncodeVersion(VersionInfo.SDKVersion)
924                                   : 0;
925         if (VersionInfo.EmitBuildVersion) {
926           // FIXME: Currently empty tools. Add clang version in the future.
927           W.write<uint32_t>(MachO::LC_BUILD_VERSION);
928           W.write<uint32_t>(sizeof(MachO::build_version_command));
929           W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform);
930           W.write<uint32_t>(EncodedVersion);
931           W.write<uint32_t>(SDKVersion);
932           W.write<uint32_t>(0); // Empty tools list.
933         } else {
934           MachO::LoadCommandType LCType =
935               getLCFromMCVM(VersionInfo.TypeOrPlatform.Type);
936           W.write<uint32_t>(LCType);
937           W.write<uint32_t>(sizeof(MachO::version_min_command));
938           W.write<uint32_t>(EncodedVersion);
939           W.write<uint32_t>(SDKVersion);
940         }
941       };
942   if (VersionInfo.Major != 0)
943     EmitDeploymentTargetVersion(VersionInfo);
944   if (TargetVariantVersionInfo.Major != 0)
945     EmitDeploymentTargetVersion(TargetVariantVersionInfo);
946 
947   // Write the data-in-code load command, if used.
948   uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8;
949   if (NumDataRegions) {
950     uint64_t DataRegionsOffset = RelocTableEnd;
951     uint64_t DataRegionsSize = NumDataRegions * 8;
952     writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset,
953                              DataRegionsSize);
954   }
955 
956   // Write the loh load command, if used.
957   uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize;
958   if (LOHSize)
959     writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT,
960                              DataInCodeTableEnd, LOHSize);
961 
962   // Write the symbol table load command, if used.
963   if (NumSymbols) {
964     unsigned FirstLocalSymbol = 0;
965     unsigned NumLocalSymbols = LocalSymbolData.size();
966     unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
967     unsigned NumExternalSymbols = ExternalSymbolData.size();
968     unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
969     unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
970     unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
971     unsigned NumSymTabSymbols =
972       NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
973     uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
974     uint64_t IndirectSymbolOffset = 0;
975 
976     // If used, the indirect symbols are written after the section data.
977     if (NumIndirectSymbols)
978       IndirectSymbolOffset = LOHTableEnd;
979 
980     // The symbol table is written after the indirect symbol data.
981     uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize;
982 
983     // The string table is written after symbol table.
984     uint64_t StringTableOffset =
985       SymbolTableOffset + NumSymTabSymbols * (is64Bit() ?
986                                               sizeof(MachO::nlist_64) :
987                                               sizeof(MachO::nlist));
988     writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
989                            StringTableOffset, StringTable.getSize());
990 
991     writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
992                              FirstExternalSymbol, NumExternalSymbols,
993                              FirstUndefinedSymbol, NumUndefinedSymbols,
994                              IndirectSymbolOffset, NumIndirectSymbols);
995   }
996 
997   // Write the linker options load commands.
998   for (const auto &Option : Asm.getLinkerOptions())
999     writeLinkerOptionsLoadCommand(Option);
1000 
1001   // Write the actual section data.
1002   for (const MCSection &Sec : Asm) {
1003     Asm.writeSectionData(W.OS, &Sec, Layout);
1004 
1005     uint64_t Pad = getPaddingSize(&Sec, Layout);
1006     W.OS.write_zeros(Pad);
1007   }
1008 
1009   // Write the extra padding.
1010   W.OS.write_zeros(SectionDataPadding);
1011 
1012   // Write the relocation entries.
1013   for (const MCSection &Sec : Asm) {
1014     // Write the section relocation entries, in reverse order to match 'as'
1015     // (approximately, the exact algorithm is more complicated than this).
1016     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
1017     for (const RelAndSymbol &Rel : llvm::reverse(Relocs)) {
1018       W.write<uint32_t>(Rel.MRE.r_word0);
1019       W.write<uint32_t>(Rel.MRE.r_word1);
1020     }
1021   }
1022 
1023   // Write out the data-in-code region payload, if there is one.
1024   for (MCAssembler::const_data_region_iterator
1025          it = Asm.data_region_begin(), ie = Asm.data_region_end();
1026          it != ie; ++it) {
1027     const DataRegionData *Data = &(*it);
1028     uint64_t Start = getSymbolAddress(*Data->Start, Layout);
1029     uint64_t End;
1030     if (Data->End)
1031       End = getSymbolAddress(*Data->End, Layout);
1032     else
1033       report_fatal_error("Data region not terminated");
1034 
1035     LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
1036                       << "  start: " << Start << "(" << Data->Start->getName()
1037                       << ")"
1038                       << "  end: " << End << "(" << Data->End->getName() << ")"
1039                       << "  size: " << End - Start << "\n");
1040     W.write<uint32_t>(Start);
1041     W.write<uint16_t>(End - Start);
1042     W.write<uint16_t>(Data->Kind);
1043   }
1044 
1045   // Write out the loh commands, if there is one.
1046   if (LOHSize) {
1047 #ifndef NDEBUG
1048     unsigned Start = W.OS.tell();
1049 #endif
1050     Asm.getLOHContainer().emit(*this, Layout);
1051     // Pad to a multiple of the pointer size.
1052     W.OS.write_zeros(
1053         offsetToAlignment(LOHRawSize, is64Bit() ? Align(8) : Align(4)));
1054     assert(W.OS.tell() - Start == LOHSize);
1055   }
1056 
1057   // Write the symbol table data, if used.
1058   if (NumSymbols) {
1059     // Write the indirect symbol entries.
1060     for (MCAssembler::const_indirect_symbol_iterator
1061            it = Asm.indirect_symbol_begin(),
1062            ie = Asm.indirect_symbol_end(); it != ie; ++it) {
1063       // Indirect symbols in the non-lazy symbol pointer section have some
1064       // special handling.
1065       const MCSectionMachO &Section =
1066           static_cast<const MCSectionMachO &>(*it->Section);
1067       if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) {
1068         // If this symbol is defined and internal, mark it as such.
1069         if (it->Symbol->isDefined() && !it->Symbol->isExternal()) {
1070           uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL;
1071           if (it->Symbol->isAbsolute())
1072             Flags |= MachO::INDIRECT_SYMBOL_ABS;
1073           W.write<uint32_t>(Flags);
1074           continue;
1075         }
1076       }
1077 
1078       W.write<uint32_t>(it->Symbol->getIndex());
1079     }
1080 
1081     // FIXME: Check that offsets match computed ones.
1082 
1083     // Write the symbol table entries.
1084     for (auto *SymbolData :
1085          {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
1086       for (MachSymbolData &Entry : *SymbolData)
1087         writeNlist(Entry, Layout);
1088 
1089     // Write the string table.
1090     StringTable.write(W.OS);
1091   }
1092 
1093   return W.OS.tell() - StartOffset;
1094 }
1095 
1096 std::unique_ptr<MCObjectWriter>
1097 llvm::createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
1098                              raw_pwrite_stream &OS, bool IsLittleEndian) {
1099   return std::make_unique<MachObjectWriter>(std::move(MOTW), OS,
1100                                              IsLittleEndian);
1101 }
1102