1 //===- InputSection.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLD_MACHO_INPUT_SECTION_H
10 #define LLD_MACHO_INPUT_SECTION_H
11
12 #include "Config.h"
13 #include "Relocations.h"
14
15 #include "lld/Common/LLVM.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/CachedHashString.h"
20 #include "llvm/BinaryFormat/MachO.h"
21
22 namespace lld {
23 namespace macho {
24
25 class InputFile;
26 class OutputSection;
27 class Defined;
28
29 class InputSection {
30 public:
31 enum Kind {
32 ConcatKind,
33 CStringLiteralKind,
34 WordLiteralKind,
35 };
36
kind()37 Kind kind() const { return shared->sectionKind; }
38 virtual ~InputSection() = default;
getSize()39 virtual uint64_t getSize() const { return data.size(); }
getFile()40 InputFile *getFile() const { return shared->file; }
getName()41 StringRef getName() const { return shared->name; }
getSegName()42 StringRef getSegName() const { return shared->segname; }
getFlags()43 uint32_t getFlags() const { return shared->flags; }
44 uint64_t getFileSize() const;
45 // Translates \p off -- an offset relative to this InputSection -- into an
46 // offset from the beginning of its parent OutputSection.
47 virtual uint64_t getOffset(uint64_t off) const = 0;
48 // The offset from the beginning of the file.
49 uint64_t getVA(uint64_t off) const;
50 // Whether the data at \p off in this InputSection is live.
51 virtual bool isLive(uint64_t off) const = 0;
52 virtual void markLive(uint64_t off) = 0;
canonical()53 virtual InputSection *canonical() { return this; }
54
55 OutputSection *parent = nullptr;
56
57 uint32_t align = 1;
58 uint32_t callSiteCount : 31;
59 // is address assigned?
60 uint32_t isFinal : 1;
61
62 ArrayRef<uint8_t> data;
63 std::vector<Reloc> relocs;
64
65 protected:
66 // The fields in this struct are immutable. Since we create a lot of
67 // InputSections with identical values for them (due to
68 // .subsections_via_symbols), factoring them out into a shared struct reduces
69 // memory consumption and makes copying cheaper.
70 struct Shared {
71 InputFile *file;
72 StringRef name;
73 StringRef segname;
74 uint32_t flags;
75 Kind sectionKind;
SharedShared76 Shared(InputFile *file, StringRef name, StringRef segname, uint32_t flags,
77 Kind kind)
78 : file(file), name(name), segname(segname), flags(flags),
79 sectionKind(kind) {}
80 };
81
InputSection(Kind kind,StringRef segname,StringRef name)82 InputSection(Kind kind, StringRef segname, StringRef name)
83 : callSiteCount(0), isFinal(false),
84 shared(make<Shared>(nullptr, name, segname, 0, kind)) {}
85
InputSection(Kind kind,StringRef segname,StringRef name,InputFile * file,ArrayRef<uint8_t> data,uint32_t align,uint32_t flags)86 InputSection(Kind kind, StringRef segname, StringRef name, InputFile *file,
87 ArrayRef<uint8_t> data, uint32_t align, uint32_t flags)
88 : align(align), callSiteCount(0), isFinal(false), data(data),
89 shared(make<Shared>(file, name, segname, flags, kind)) {}
90
91 const Shared *const shared;
92 };
93
94 // ConcatInputSections are combined into (Concat)OutputSections through simple
95 // concatenation, in contrast with literal sections which may have their
96 // contents merged before output.
97 class ConcatInputSection final : public InputSection {
98 public:
ConcatInputSection(StringRef segname,StringRef name)99 ConcatInputSection(StringRef segname, StringRef name)
100 : InputSection(ConcatKind, segname, name) {}
101
102 ConcatInputSection(StringRef segname, StringRef name, InputFile *file,
103 ArrayRef<uint8_t> data, uint32_t align = 1,
104 uint32_t flags = 0)
InputSection(ConcatKind,segname,name,file,data,align,flags)105 : InputSection(ConcatKind, segname, name, file, data, align, flags) {}
106
getOffset(uint64_t off)107 uint64_t getOffset(uint64_t off) const override { return outSecOff + off; }
getVA()108 uint64_t getVA() const { return InputSection::getVA(0); }
109 // ConcatInputSections are entirely live or dead, so the offset is irrelevant.
isLive(uint64_t off)110 bool isLive(uint64_t off) const override { return live; }
markLive(uint64_t off)111 void markLive(uint64_t off) override { live = true; }
isCoalescedWeak()112 bool isCoalescedWeak() const { return wasCoalesced && numRefs == 0; }
shouldOmitFromOutput()113 bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
114 bool isHashableForICF() const;
115 void hashForICF();
116 void writeTo(uint8_t *buf);
117
118 void foldIdentical(ConcatInputSection *redundant);
canonical()119 InputSection *canonical() override {
120 return replacement ? replacement : this;
121 }
122
classof(const InputSection * isec)123 static bool classof(const InputSection *isec) {
124 return isec->kind() == ConcatKind;
125 }
126
127 // Points to the surviving section after this one is folded by ICF
128 InputSection *replacement = nullptr;
129 // Equivalence-class ID for ICF
130 uint64_t icfEqClass[2] = {0, 0};
131
132 // With subsections_via_symbols, most symbols have their own InputSection,
133 // and for weak symbols (e.g. from inline functions), only the
134 // InputSection from one translation unit will make it to the output,
135 // while all copies in other translation units are coalesced into the
136 // first and not copied to the output.
137 bool wasCoalesced = false;
138 bool live = !config->deadStrip;
139 // How many symbols refer to this InputSection.
140 uint32_t numRefs = 0;
141 // This variable has two usages. Initially, it represents the input order.
142 // After assignAddresses is called, it represents the offset from the
143 // beginning of the output section this section was assigned to.
144 uint64_t outSecOff = 0;
145 };
146
147 // Verify ConcatInputSection's size on 64-bit builds.
148 static_assert(sizeof(int) != 8 || sizeof(ConcatInputSection) == 112,
149 "Try to minimize ConcatInputSection's size, we create many "
150 "instances of it");
151
152 // Helper functions to make it easy to sprinkle asserts.
153
shouldOmitFromOutput(InputSection * isec)154 inline bool shouldOmitFromOutput(InputSection *isec) {
155 return isa<ConcatInputSection>(isec) &&
156 cast<ConcatInputSection>(isec)->shouldOmitFromOutput();
157 }
158
isCoalescedWeak(InputSection * isec)159 inline bool isCoalescedWeak(InputSection *isec) {
160 return isa<ConcatInputSection>(isec) &&
161 cast<ConcatInputSection>(isec)->isCoalescedWeak();
162 }
163
164 // We allocate a lot of these and binary search on them, so they should be as
165 // compact as possible. Hence the use of 31 rather than 64 bits for the hash.
166 struct StringPiece {
167 // Offset from the start of the containing input section.
168 uint32_t inSecOff;
169 uint32_t live : 1;
170 // Only set if deduplicating literals
171 uint32_t hash : 31;
172 // Offset from the start of the containing output section.
173 uint64_t outSecOff = 0;
174
StringPieceStringPiece175 StringPiece(uint64_t off, uint32_t hash)
176 : inSecOff(off), live(!config->deadStrip), hash(hash) {}
177 };
178
179 static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");
180
181 // CStringInputSections are composed of multiple null-terminated string
182 // literals, which we represent using StringPieces. These literals can be
183 // deduplicated and tail-merged, so translating offsets between the input and
184 // outputs sections is more complicated.
185 //
186 // NOTE: One significant difference between LLD and ld64 is that we merge all
187 // cstring literals, even those referenced directly by non-private symbols.
188 // ld64 is more conservative and does not do that. This was mostly done for
189 // implementation simplicity; if we find programs that need the more
190 // conservative behavior we can certainly implement that.
191 class CStringInputSection final : public InputSection {
192 public:
CStringInputSection(StringRef segname,StringRef name,InputFile * file,ArrayRef<uint8_t> data,uint32_t align,uint32_t flags)193 CStringInputSection(StringRef segname, StringRef name, InputFile *file,
194 ArrayRef<uint8_t> data, uint32_t align, uint32_t flags)
195 : InputSection(CStringLiteralKind, segname, name, file, data, align,
196 flags) {}
197 uint64_t getOffset(uint64_t off) const override;
isLive(uint64_t off)198 bool isLive(uint64_t off) const override { return getStringPiece(off).live; }
markLive(uint64_t off)199 void markLive(uint64_t off) override { getStringPiece(off).live = true; }
200 // Find the StringPiece that contains this offset.
201 StringPiece &getStringPiece(uint64_t off);
202 const StringPiece &getStringPiece(uint64_t off) const;
203 // Split at each null byte.
204 void splitIntoPieces();
205
206 LLVM_ATTRIBUTE_ALWAYS_INLINE
getStringRef(size_t i)207 StringRef getStringRef(size_t i) const {
208 size_t begin = pieces[i].inSecOff;
209 size_t end =
210 (pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff;
211 return toStringRef(data.slice(begin, end - begin));
212 }
213
214 // Returns i'th piece as a CachedHashStringRef. This function is very hot when
215 // string merging is enabled, so we want to inline.
216 LLVM_ATTRIBUTE_ALWAYS_INLINE
getCachedHashStringRef(size_t i)217 llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const {
218 assert(config->dedupLiterals);
219 return {getStringRef(i), pieces[i].hash};
220 }
221
classof(const InputSection * isec)222 static bool classof(const InputSection *isec) {
223 return isec->kind() == CStringLiteralKind;
224 }
225
226 std::vector<StringPiece> pieces;
227 };
228
229 class WordLiteralInputSection final : public InputSection {
230 public:
231 WordLiteralInputSection(StringRef segname, StringRef name, InputFile *file,
232 ArrayRef<uint8_t> data, uint32_t align,
233 uint32_t flags);
234 uint64_t getOffset(uint64_t off) const override;
isLive(uint64_t off)235 bool isLive(uint64_t off) const override {
236 return live[off >> power2LiteralSize];
237 }
markLive(uint64_t off)238 void markLive(uint64_t off) override { live[off >> power2LiteralSize] = 1; }
239
classof(const InputSection * isec)240 static bool classof(const InputSection *isec) {
241 return isec->kind() == WordLiteralKind;
242 }
243
244 private:
245 unsigned power2LiteralSize;
246 // The liveness of data[off] is tracked by live[off >> power2LiteralSize].
247 llvm::BitVector live;
248 };
249
sectionType(uint32_t flags)250 inline uint8_t sectionType(uint32_t flags) {
251 return flags & llvm::MachO::SECTION_TYPE;
252 }
253
isZeroFill(uint32_t flags)254 inline bool isZeroFill(uint32_t flags) {
255 return llvm::MachO::isVirtualSection(sectionType(flags));
256 }
257
isThreadLocalVariables(uint32_t flags)258 inline bool isThreadLocalVariables(uint32_t flags) {
259 return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES;
260 }
261
262 // These sections contain the data for initializing thread-local variables.
isThreadLocalData(uint32_t flags)263 inline bool isThreadLocalData(uint32_t flags) {
264 return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR ||
265 sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL;
266 }
267
isDebugSection(uint32_t flags)268 inline bool isDebugSection(uint32_t flags) {
269 return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) ==
270 llvm::MachO::S_ATTR_DEBUG;
271 }
272
isWordLiteralSection(uint32_t flags)273 inline bool isWordLiteralSection(uint32_t flags) {
274 return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS ||
275 sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS ||
276 sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS;
277 }
278
279 bool isCodeSection(const InputSection *);
280
281 bool isCfStringSection(const InputSection *);
282
283 extern std::vector<ConcatInputSection *> inputSections;
284
285 namespace section_names {
286
287 constexpr const char authGot[] = "__auth_got";
288 constexpr const char authPtr[] = "__auth_ptr";
289 constexpr const char binding[] = "__binding";
290 constexpr const char bitcodeBundle[] = "__bundle";
291 constexpr const char cString[] = "__cstring";
292 constexpr const char cfString[] = "__cfstring";
293 constexpr const char codeSignature[] = "__code_signature";
294 constexpr const char common[] = "__common";
295 constexpr const char compactUnwind[] = "__compact_unwind";
296 constexpr const char data[] = "__data";
297 constexpr const char debugAbbrev[] = "__debug_abbrev";
298 constexpr const char debugInfo[] = "__debug_info";
299 constexpr const char debugStr[] = "__debug_str";
300 constexpr const char ehFrame[] = "__eh_frame";
301 constexpr const char export_[] = "__export";
302 constexpr const char dataInCode[] = "__data_in_code";
303 constexpr const char functionStarts[] = "__func_starts";
304 constexpr const char got[] = "__got";
305 constexpr const char header[] = "__mach_header";
306 constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
307 constexpr const char const_[] = "__const";
308 constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";
309 constexpr const char lazyBinding[] = "__lazy_binding";
310 constexpr const char literals[] = "__literals";
311 constexpr const char moduleInitFunc[] = "__mod_init_func";
312 constexpr const char moduleTermFunc[] = "__mod_term_func";
313 constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
314 constexpr const char objcCatList[] = "__objc_catlist";
315 constexpr const char objcClassList[] = "__objc_classlist";
316 constexpr const char objcConst[] = "__objc_const";
317 constexpr const char objcImageInfo[] = "__objc_imageinfo";
318 constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";
319 constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";
320 constexpr const char objcProtoList[] = "__objc_protolist";
321 constexpr const char pageZero[] = "__pagezero";
322 constexpr const char pointers[] = "__pointers";
323 constexpr const char rebase[] = "__rebase";
324 constexpr const char staticInit[] = "__StaticInit";
325 constexpr const char stringTable[] = "__string_table";
326 constexpr const char stubHelper[] = "__stub_helper";
327 constexpr const char stubs[] = "__stubs";
328 constexpr const char swift[] = "__swift";
329 constexpr const char symbolTable[] = "__symbol_table";
330 constexpr const char textCoalNt[] = "__textcoal_nt";
331 constexpr const char text[] = "__text";
332 constexpr const char threadPtrs[] = "__thread_ptrs";
333 constexpr const char threadVars[] = "__thread_vars";
334 constexpr const char unwindInfo[] = "__unwind_info";
335 constexpr const char weakBinding[] = "__weak_binding";
336 constexpr const char zeroFill[] = "__zerofill";
337
338 } // namespace section_names
339
340 } // namespace macho
341
342 std::string toString(const macho::InputSection *);
343
344 } // namespace lld
345
346 #endif
347