1 //===- InputSection.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_MACHO_INPUT_SECTION_H
10 #define LLD_MACHO_INPUT_SECTION_H
11 
12 #include "Config.h"
13 #include "Relocations.h"
14 #include "Symbols.h"
15 
16 #include "lld/Common/LLVM.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/CachedHashString.h"
21 #include "llvm/ADT/TinyPtrVector.h"
22 #include "llvm/BinaryFormat/MachO.h"
23 
24 namespace lld {
25 namespace macho {
26 
27 class InputFile;
28 class OutputSection;
29 
30 class InputSection {
31 public:
32   enum Kind {
33     ConcatKind,
34     CStringLiteralKind,
35     WordLiteralKind,
36   };
37 
38   Kind kind() const { return shared->sectionKind; }
39   virtual ~InputSection() = default;
40   virtual uint64_t getSize() const { return data.size(); }
41   virtual bool empty() const { return data.empty(); }
42   InputFile *getFile() const { return shared->file; }
43   StringRef getName() const { return shared->name; }
44   StringRef getSegName() const { return shared->segname; }
45   uint32_t getFlags() const { return shared->flags; }
46   uint64_t getFileSize() const;
47   // Translates \p off -- an offset relative to this InputSection -- into an
48   // offset from the beginning of its parent OutputSection.
49   virtual uint64_t getOffset(uint64_t off) const = 0;
50   // The offset from the beginning of the file.
51   uint64_t getVA(uint64_t off) const;
52   // Whether the data at \p off in this InputSection is live.
53   virtual bool isLive(uint64_t off) const = 0;
54   virtual void markLive(uint64_t off) = 0;
55   virtual InputSection *canonical() { return this; }
56   virtual const InputSection *canonical() const { return this; }
57 
58   OutputSection *parent = nullptr;
59 
60   uint32_t align = 1;
61   // is address assigned?
62   bool isFinal = false;
63 
64   ArrayRef<uint8_t> data;
65   std::vector<Reloc> relocs;
66   // The symbols that belong to this InputSection, sorted by value. With
67   // .subsections_via_symbols, there is typically only one element here.
68   llvm::TinyPtrVector<Defined *> symbols;
69 
70 protected:
71   // The fields in this struct are immutable. Since we create a lot of
72   // InputSections with identical values for them (due to
73   // .subsections_via_symbols), factoring them out into a shared struct reduces
74   // memory consumption and makes copying cheaper.
75   struct Shared {
76     InputFile *file;
77     StringRef name;
78     StringRef segname;
79     uint32_t flags;
80     Kind sectionKind;
81     Shared(InputFile *file, StringRef name, StringRef segname, uint32_t flags,
82            Kind kind)
83         : file(file), name(name), segname(segname), flags(flags),
84           sectionKind(kind) {}
85   };
86 
87   InputSection(Kind kind, StringRef segname, StringRef name, InputFile *file,
88                ArrayRef<uint8_t> data, uint32_t align, uint32_t flags)
89       : align(align), data(data),
90         shared(make<Shared>(file, name, segname, flags, kind)) {}
91 
92   InputSection(const InputSection &rhs)
93       : align(rhs.align), data(rhs.data), shared(rhs.shared) {}
94 
95   const Shared *const shared;
96 };
97 
98 // ConcatInputSections are combined into (Concat)OutputSections through simple
99 // concatenation, in contrast with literal sections which may have their
100 // contents merged before output.
101 class ConcatInputSection final : public InputSection {
102 public:
103   ConcatInputSection(StringRef segname, StringRef name, InputFile *file,
104                      ArrayRef<uint8_t> data, uint32_t align = 1,
105                      uint32_t flags = 0)
106       : InputSection(ConcatKind, segname, name, file, data, align, flags) {}
107 
108   ConcatInputSection(StringRef segname, StringRef name)
109       : ConcatInputSection(segname, name, /*file=*/nullptr,
110                            /*data=*/{},
111                            /*align=*/1, /*flags=*/0) {}
112 
113   uint64_t getOffset(uint64_t off) const override { return outSecOff + off; }
114   uint64_t getVA() const { return InputSection::getVA(0); }
115   // ConcatInputSections are entirely live or dead, so the offset is irrelevant.
116   bool isLive(uint64_t off) const override { return live; }
117   void markLive(uint64_t off) override { live = true; }
118   bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); }
119   bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
120   bool isHashableForICF() const;
121   void hashForICF();
122   void writeTo(uint8_t *buf);
123 
124   void foldIdentical(ConcatInputSection *redundant);
125   ConcatInputSection *canonical() override {
126     return replacement ? replacement : this;
127   }
128   const InputSection *canonical() const override {
129     return replacement ? replacement : this;
130   }
131 
132   static bool classof(const InputSection *isec) {
133     return isec->kind() == ConcatKind;
134   }
135 
136   // Points to the surviving section after this one is folded by ICF
137   ConcatInputSection *replacement = nullptr;
138   // Equivalence-class ID for ICF
139   uint64_t icfEqClass[2] = {0, 0};
140 
141   // With subsections_via_symbols, most symbols have their own InputSection,
142   // and for weak symbols (e.g. from inline functions), only the
143   // InputSection from one translation unit will make it to the output,
144   // while all copies in other translation units are coalesced into the
145   // first and not copied to the output.
146   bool wasCoalesced = false;
147   bool live = !config->deadStrip;
148   bool hasCallSites = false;
149   // This variable has two usages. Initially, it represents the input order.
150   // After assignAddresses is called, it represents the offset from the
151   // beginning of the output section this section was assigned to.
152   uint64_t outSecOff = 0;
153 };
154 
155 // Helper functions to make it easy to sprinkle asserts.
156 
157 inline bool shouldOmitFromOutput(InputSection *isec) {
158   return isa<ConcatInputSection>(isec) &&
159          cast<ConcatInputSection>(isec)->shouldOmitFromOutput();
160 }
161 
162 inline bool isCoalescedWeak(InputSection *isec) {
163   return isa<ConcatInputSection>(isec) &&
164          cast<ConcatInputSection>(isec)->isCoalescedWeak();
165 }
166 
167 // We allocate a lot of these and binary search on them, so they should be as
168 // compact as possible. Hence the use of 31 rather than 64 bits for the hash.
169 struct StringPiece {
170   // Offset from the start of the containing input section.
171   uint32_t inSecOff;
172   uint32_t live : 1;
173   // Only set if deduplicating literals
174   uint32_t hash : 31;
175   // Offset from the start of the containing output section.
176   uint64_t outSecOff = 0;
177 
178   StringPiece(uint64_t off, uint32_t hash)
179       : inSecOff(off), live(!config->deadStrip), hash(hash) {}
180 };
181 
182 static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");
183 
184 // CStringInputSections are composed of multiple null-terminated string
185 // literals, which we represent using StringPieces. These literals can be
186 // deduplicated and tail-merged, so translating offsets between the input and
187 // outputs sections is more complicated.
188 //
189 // NOTE: One significant difference between LLD and ld64 is that we merge all
190 // cstring literals, even those referenced directly by non-private symbols.
191 // ld64 is more conservative and does not do that. This was mostly done for
192 // implementation simplicity; if we find programs that need the more
193 // conservative behavior we can certainly implement that.
194 class CStringInputSection final : public InputSection {
195 public:
196   CStringInputSection(StringRef segname, StringRef name, InputFile *file,
197                       ArrayRef<uint8_t> data, uint32_t align, uint32_t flags)
198       : InputSection(CStringLiteralKind, segname, name, file, data, align,
199                      flags) {}
200   uint64_t getOffset(uint64_t off) const override;
201   bool isLive(uint64_t off) const override { return getStringPiece(off).live; }
202   void markLive(uint64_t off) override { getStringPiece(off).live = true; }
203   // Find the StringPiece that contains this offset.
204   StringPiece &getStringPiece(uint64_t off);
205   const StringPiece &getStringPiece(uint64_t off) const;
206   // Split at each null byte.
207   void splitIntoPieces();
208 
209   LLVM_ATTRIBUTE_ALWAYS_INLINE
210   StringRef getStringRef(size_t i) const {
211     size_t begin = pieces[i].inSecOff;
212     size_t end =
213         (pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff;
214     return toStringRef(data.slice(begin, end - begin));
215   }
216 
217   // Returns i'th piece as a CachedHashStringRef. This function is very hot when
218   // string merging is enabled, so we want to inline.
219   LLVM_ATTRIBUTE_ALWAYS_INLINE
220   llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const {
221     assert(config->dedupLiterals);
222     return {getStringRef(i), pieces[i].hash};
223   }
224 
225   static bool classof(const InputSection *isec) {
226     return isec->kind() == CStringLiteralKind;
227   }
228 
229   std::vector<StringPiece> pieces;
230 };
231 
232 class WordLiteralInputSection final : public InputSection {
233 public:
234   WordLiteralInputSection(StringRef segname, StringRef name, InputFile *file,
235                           ArrayRef<uint8_t> data, uint32_t align,
236                           uint32_t flags);
237   uint64_t getOffset(uint64_t off) const override;
238   bool isLive(uint64_t off) const override {
239     return live[off >> power2LiteralSize];
240   }
241   void markLive(uint64_t off) override {
242     live[off >> power2LiteralSize] = true;
243   }
244 
245   static bool classof(const InputSection *isec) {
246     return isec->kind() == WordLiteralKind;
247   }
248 
249 private:
250   unsigned power2LiteralSize;
251   // The liveness of data[off] is tracked by live[off >> power2LiteralSize].
252   llvm::BitVector live;
253 };
254 
255 inline uint8_t sectionType(uint32_t flags) {
256   return flags & llvm::MachO::SECTION_TYPE;
257 }
258 
259 inline bool isZeroFill(uint32_t flags) {
260   return llvm::MachO::isVirtualSection(sectionType(flags));
261 }
262 
263 inline bool isThreadLocalVariables(uint32_t flags) {
264   return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES;
265 }
266 
267 // These sections contain the data for initializing thread-local variables.
268 inline bool isThreadLocalData(uint32_t flags) {
269   return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR ||
270          sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL;
271 }
272 
273 inline bool isDebugSection(uint32_t flags) {
274   return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) ==
275          llvm::MachO::S_ATTR_DEBUG;
276 }
277 
278 inline bool isWordLiteralSection(uint32_t flags) {
279   return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS ||
280          sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS ||
281          sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS;
282 }
283 
284 bool isCodeSection(const InputSection *);
285 
286 bool isCfStringSection(const InputSection *);
287 
288 extern std::vector<ConcatInputSection *> inputSections;
289 
290 namespace section_names {
291 
292 constexpr const char authGot[] = "__auth_got";
293 constexpr const char authPtr[] = "__auth_ptr";
294 constexpr const char binding[] = "__binding";
295 constexpr const char bitcodeBundle[] = "__bundle";
296 constexpr const char cString[] = "__cstring";
297 constexpr const char cfString[] = "__cfstring";
298 constexpr const char codeSignature[] = "__code_signature";
299 constexpr const char common[] = "__common";
300 constexpr const char compactUnwind[] = "__compact_unwind";
301 constexpr const char data[] = "__data";
302 constexpr const char debugAbbrev[] = "__debug_abbrev";
303 constexpr const char debugInfo[] = "__debug_info";
304 constexpr const char debugStr[] = "__debug_str";
305 constexpr const char ehFrame[] = "__eh_frame";
306 constexpr const char gccExceptTab[] = "__gcc_except_tab";
307 constexpr const char export_[] = "__export";
308 constexpr const char dataInCode[] = "__data_in_code";
309 constexpr const char functionStarts[] = "__func_starts";
310 constexpr const char got[] = "__got";
311 constexpr const char header[] = "__mach_header";
312 constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
313 constexpr const char const_[] = "__const";
314 constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";
315 constexpr const char lazyBinding[] = "__lazy_binding";
316 constexpr const char literals[] = "__literals";
317 constexpr const char moduleInitFunc[] = "__mod_init_func";
318 constexpr const char moduleTermFunc[] = "__mod_term_func";
319 constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
320 constexpr const char objcCatList[] = "__objc_catlist";
321 constexpr const char objcClassList[] = "__objc_classlist";
322 constexpr const char objcConst[] = "__objc_const";
323 constexpr const char objcImageInfo[] = "__objc_imageinfo";
324 constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";
325 constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";
326 constexpr const char objcProtoList[] = "__objc_protolist";
327 constexpr const char pageZero[] = "__pagezero";
328 constexpr const char pointers[] = "__pointers";
329 constexpr const char rebase[] = "__rebase";
330 constexpr const char staticInit[] = "__StaticInit";
331 constexpr const char stringTable[] = "__string_table";
332 constexpr const char stubHelper[] = "__stub_helper";
333 constexpr const char stubs[] = "__stubs";
334 constexpr const char swift[] = "__swift";
335 constexpr const char symbolTable[] = "__symbol_table";
336 constexpr const char textCoalNt[] = "__textcoal_nt";
337 constexpr const char text[] = "__text";
338 constexpr const char threadPtrs[] = "__thread_ptrs";
339 constexpr const char threadVars[] = "__thread_vars";
340 constexpr const char unwindInfo[] = "__unwind_info";
341 constexpr const char weakBinding[] = "__weak_binding";
342 constexpr const char zeroFill[] = "__zerofill";
343 
344 } // namespace section_names
345 
346 } // namespace macho
347 
348 std::string toString(const macho::InputSection *);
349 
350 } // namespace lld
351 
352 #endif
353