1 //===- llvm/MC/MCMachObjectWriter.h - Mach Object Writer --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_MC_MCMACHOBJECTWRITER_H
10 #define LLVM_MC_MCMACHOBJECTWRITER_H
11 
12 #include "llvm/ADT/DenseMap.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCExpr.h"
16 #include "llvm/MC/MCObjectWriter.h"
17 #include "llvm/MC/MCSection.h"
18 #include "llvm/MC/StringTableBuilder.h"
19 #include "llvm/Support/EndianStream.h"
20 #include <cstdint>
21 #include <memory>
22 #include <string>
23 #include <vector>
24 
25 namespace llvm {
26 
27 class MachObjectWriter;
28 
29 class MCMachObjectTargetWriter : public MCObjectTargetWriter {
30   const unsigned Is64Bit : 1;
31   const uint32_t CPUType;
32 protected:
33   uint32_t CPUSubtype;
34 public:
35   unsigned LocalDifference_RIT;
36 
37 protected:
38   MCMachObjectTargetWriter(bool Is64Bit_, uint32_t CPUType_,
39                            uint32_t CPUSubtype_);
40 
setLocalDifferenceRelocationType(unsigned Type)41   void setLocalDifferenceRelocationType(unsigned Type) {
42     LocalDifference_RIT = Type;
43   }
44 
45 public:
46   virtual ~MCMachObjectTargetWriter();
47 
getFormat()48   Triple::ObjectFormatType getFormat() const override { return Triple::MachO; }
classof(const MCObjectTargetWriter * W)49   static bool classof(const MCObjectTargetWriter *W) {
50     return W->getFormat() == Triple::MachO;
51   }
52 
53   /// \name Lifetime Management
54   /// @{
55 
reset()56   virtual void reset() {}
57 
58   /// @}
59 
60   /// \name Accessors
61   /// @{
62 
is64Bit()63   bool is64Bit() const { return Is64Bit; }
getCPUType()64   uint32_t getCPUType() const { return CPUType; }
getCPUSubtype()65   uint32_t getCPUSubtype() const { return CPUSubtype; }
getLocalDifferenceRelocationType()66   unsigned getLocalDifferenceRelocationType() const {
67     return LocalDifference_RIT;
68   }
69 
70   /// @}
71 
72   /// \name API
73   /// @{
74 
75   virtual void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
76                                 const MCAsmLayout &Layout,
77                                 const MCFragment *Fragment,
78                                 const MCFixup &Fixup, MCValue Target,
79                                 uint64_t &FixedValue) = 0;
80 
81   /// @}
82 };
83 
84 class MachObjectWriter : public MCObjectWriter {
85   /// Helper struct for containing some precomputed information on symbols.
86   struct MachSymbolData {
87     const MCSymbol *Symbol;
88     uint64_t StringIndex;
89     uint8_t SectionIndex;
90 
91     // Support lexicographic sorting.
92     bool operator<(const MachSymbolData &RHS) const;
93   };
94 
95   /// The target specific Mach-O writer instance.
96   std::unique_ptr<MCMachObjectTargetWriter> TargetObjectWriter;
97 
98   /// \name Relocation Data
99   /// @{
100 
101   struct RelAndSymbol {
102     const MCSymbol *Sym;
103     MachO::any_relocation_info MRE;
RelAndSymbolRelAndSymbol104     RelAndSymbol(const MCSymbol *Sym, const MachO::any_relocation_info &MRE)
105         : Sym(Sym), MRE(MRE) {}
106   };
107 
108   DenseMap<const MCSection *, std::vector<RelAndSymbol>> Relocations;
109   DenseMap<const MCSection *, unsigned> IndirectSymBase;
110 
111   SectionAddrMap SectionAddress;
112 
113   /// @}
114   /// \name Symbol Table Data
115   /// @{
116 
117   StringTableBuilder StringTable;
118   std::vector<MachSymbolData> LocalSymbolData;
119   std::vector<MachSymbolData> ExternalSymbolData;
120   std::vector<MachSymbolData> UndefinedSymbolData;
121 
122   /// @}
123 
124   MachSymbolData *findSymbolData(const MCSymbol &Sym);
125 
126   void writeWithPadding(StringRef Str, uint64_t Size);
127 
128 public:
MachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,raw_pwrite_stream & OS,bool IsLittleEndian)129   MachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
130                    raw_pwrite_stream &OS, bool IsLittleEndian)
131       : TargetObjectWriter(std::move(MOTW)),
132         StringTable(TargetObjectWriter->is64Bit() ? StringTableBuilder::MachO64
133                                                   : StringTableBuilder::MachO),
134         W(OS, IsLittleEndian ? support::little : support::big) {}
135 
136   support::endian::Writer W;
137 
138   const MCSymbol &findAliasedSymbol(const MCSymbol &Sym) const;
139 
140   /// \name Lifetime management Methods
141   /// @{
142 
143   void reset() override;
144 
145   /// @}
146 
147   /// \name Utility Methods
148   /// @{
149 
150   bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
151 
getSectionAddressMap()152   SectionAddrMap &getSectionAddressMap() { return SectionAddress; }
153 
getSectionAddress(const MCSection * Sec)154   uint64_t getSectionAddress(const MCSection *Sec) const {
155     return SectionAddress.lookup(Sec);
156   }
157   uint64_t getSymbolAddress(const MCSymbol &S, const MCAsmLayout &Layout) const;
158 
159   uint64_t getFragmentAddress(const MCFragment *Fragment,
160                               const MCAsmLayout &Layout) const;
161 
162   uint64_t getPaddingSize(const MCSection *SD, const MCAsmLayout &Layout) const;
163 
164   bool doesSymbolRequireExternRelocation(const MCSymbol &S);
165 
166   /// @}
167 
168   /// \name Target Writer Proxy Accessors
169   /// @{
170 
is64Bit()171   bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
isX86_64()172   bool isX86_64() const {
173     uint32_t CPUType = TargetObjectWriter->getCPUType();
174     return CPUType == MachO::CPU_TYPE_X86_64;
175   }
176 
177   /// @}
178 
179   void writeHeader(MachO::HeaderFileType Type, unsigned NumLoadCommands,
180                    unsigned LoadCommandsSize, bool SubsectionsViaSymbols);
181 
182   /// Write a segment load command.
183   ///
184   /// \param NumSections The number of sections in this segment.
185   /// \param SectionDataSize The total size of the sections.
186   void writeSegmentLoadCommand(StringRef Name, unsigned NumSections,
187                                uint64_t VMAddr, uint64_t VMSize,
188                                uint64_t SectionDataStartOffset,
189                                uint64_t SectionDataSize, uint32_t MaxProt,
190                                uint32_t InitProt);
191 
192   void writeSection(const MCAsmLayout &Layout, const MCSection &Sec,
193                     uint64_t VMAddr, uint64_t FileOffset, unsigned Flags,
194                     uint64_t RelocationsStart, unsigned NumRelocations);
195 
196   void writeSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
197                               uint32_t StringTableOffset,
198                               uint32_t StringTableSize);
199 
200   void writeDysymtabLoadCommand(
201       uint32_t FirstLocalSymbol, uint32_t NumLocalSymbols,
202       uint32_t FirstExternalSymbol, uint32_t NumExternalSymbols,
203       uint32_t FirstUndefinedSymbol, uint32_t NumUndefinedSymbols,
204       uint32_t IndirectSymbolOffset, uint32_t NumIndirectSymbols);
205 
206   void writeNlist(MachSymbolData &MSD, const MCAsmLayout &Layout);
207 
208   void writeLinkeditLoadCommand(uint32_t Type, uint32_t DataOffset,
209                                 uint32_t DataSize);
210 
211   void writeLinkerOptionsLoadCommand(const std::vector<std::string> &Options);
212 
213   // FIXME: We really need to improve the relocation validation. Basically, we
214   // want to implement a separate computation which evaluates the relocation
215   // entry as the linker would, and verifies that the resultant fixup value is
216   // exactly what the encoder wanted. This will catch several classes of
217   // problems:
218   //
219   //  - Relocation entry bugs, the two algorithms are unlikely to have the same
220   //    exact bug.
221   //
222   //  - Relaxation issues, where we forget to relax something.
223   //
224   //  - Input errors, where something cannot be correctly encoded. 'as' allows
225   //    these through in many cases.
226 
227   // Add a relocation to be output in the object file. At the time this is
228   // called, the symbol indexes are not know, so if the relocation refers
229   // to a symbol it should be passed as \p RelSymbol so that it can be updated
230   // afterwards. If the relocation doesn't refer to a symbol, nullptr should be
231   // used.
addRelocation(const MCSymbol * RelSymbol,const MCSection * Sec,MachO::any_relocation_info & MRE)232   void addRelocation(const MCSymbol *RelSymbol, const MCSection *Sec,
233                      MachO::any_relocation_info &MRE) {
234     RelAndSymbol P(RelSymbol, MRE);
235     Relocations[Sec].push_back(P);
236   }
237 
238   void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
239                         const MCFragment *Fragment, const MCFixup &Fixup,
240                         MCValue Target, uint64_t &FixedValue) override;
241 
242   void bindIndirectSymbols(MCAssembler &Asm);
243 
244   /// Compute the symbol table data.
245   void computeSymbolTable(MCAssembler &Asm,
246                           std::vector<MachSymbolData> &LocalSymbolData,
247                           std::vector<MachSymbolData> &ExternalSymbolData,
248                           std::vector<MachSymbolData> &UndefinedSymbolData);
249 
250   void computeSectionAddresses(const MCAssembler &Asm,
251                                const MCAsmLayout &Layout);
252 
253   void executePostLayoutBinding(MCAssembler &Asm,
254                                 const MCAsmLayout &Layout) override;
255 
256   bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
257                                               const MCSymbol &A,
258                                               const MCSymbol &B,
259                                               bool InSet) const override;
260 
261   bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
262                                               const MCSymbol &SymA,
263                                               const MCFragment &FB, bool InSet,
264                                               bool IsPCRel) const override;
265 
266   uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
267 };
268 
269 /// Construct a new Mach-O writer instance.
270 ///
271 /// This routine takes ownership of the target writer subclass.
272 ///
273 /// \param MOTW - The target specific Mach-O writer subclass.
274 /// \param OS - The stream to write to.
275 /// \returns The constructed object writer.
276 std::unique_ptr<MCObjectWriter>
277 createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
278                        raw_pwrite_stream &OS, bool IsLittleEndian);
279 
280 } // end namespace llvm
281 
282 #endif // LLVM_MC_MCMACHOBJECTWRITER_H
283