1 //===- InputSection.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLD_MACHO_INPUT_SECTION_H
10 #define LLD_MACHO_INPUT_SECTION_H
11
12 #include "Config.h"
13 #include "Relocations.h"
14 #include "Symbols.h"
15
16 #include "lld/Common/LLVM.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/CachedHashString.h"
21 #include "llvm/ADT/TinyPtrVector.h"
22 #include "llvm/BinaryFormat/MachO.h"
23
24 namespace lld {
25 namespace macho {
26
27 class InputFile;
28 class OutputSection;
29
30 class InputSection {
31 public:
32 enum Kind : uint8_t {
33 ConcatKind,
34 CStringLiteralKind,
35 WordLiteralKind,
36 };
37
kind()38 Kind kind() const { return sectionKind; }
39 virtual ~InputSection() = default;
getSize()40 virtual uint64_t getSize() const { return data.size(); }
empty()41 virtual bool empty() const { return data.empty(); }
getFile()42 InputFile *getFile() const { return section.file; }
getName()43 StringRef getName() const { return section.name; }
getSegName()44 StringRef getSegName() const { return section.segname; }
getFlags()45 uint32_t getFlags() const { return section.flags; }
46 uint64_t getFileSize() const;
47 // Translates \p off -- an offset relative to this InputSection -- into an
48 // offset from the beginning of its parent OutputSection.
49 virtual uint64_t getOffset(uint64_t off) const = 0;
50 // The offset from the beginning of the file.
51 uint64_t getVA(uint64_t off) const;
52 // Return a user-friendly string for use in diagnostics.
53 // Format: /path/to/object.o:(symbol _func+0x123)
54 std::string getLocation(uint64_t off) const;
55 // Return the source line corresponding to an address, or the empty string.
56 // Format: Source.cpp:123 (/path/to/Source.cpp:123)
57 std::string getSourceLocation(uint64_t off) const;
58 // Whether the data at \p off in this InputSection is live.
59 virtual bool isLive(uint64_t off) const = 0;
60 virtual void markLive(uint64_t off) = 0;
canonical()61 virtual InputSection *canonical() { return this; }
canonical()62 virtual const InputSection *canonical() const { return this; }
63
64 protected:
InputSection(Kind kind,const Section & section,ArrayRef<uint8_t> data,uint32_t align)65 InputSection(Kind kind, const Section §ion, ArrayRef<uint8_t> data,
66 uint32_t align)
67 : sectionKind(kind), keepUnique(false), hasAltEntry(false), align(align),
68 data(data), section(section) {}
69
InputSection(const InputSection & rhs)70 InputSection(const InputSection &rhs)
71 : sectionKind(rhs.sectionKind), keepUnique(false), hasAltEntry(false),
72 align(rhs.align), data(rhs.data), section(rhs.section) {}
73
74 Kind sectionKind;
75
76 public:
77 // is address assigned?
78 bool isFinal = false;
79 // keep the address of the symbol(s) in this section unique in the final
80 // binary ?
81 bool keepUnique : 1;
82 // Does this section have symbols at offsets other than zero? (NOTE: only
83 // applies to ConcatInputSections.)
84 bool hasAltEntry : 1;
85 uint32_t align = 1;
86
87 OutputSection *parent = nullptr;
88 ArrayRef<uint8_t> data;
89 std::vector<Reloc> relocs;
90 // The symbols that belong to this InputSection, sorted by value. With
91 // .subsections_via_symbols, there is typically only one element here.
92 llvm::TinyPtrVector<Defined *> symbols;
93
94 protected:
95 const Section §ion;
96
97 const Defined *getContainingSymbol(uint64_t off) const;
98 };
99
100 // ConcatInputSections are combined into (Concat)OutputSections through simple
101 // concatenation, in contrast with literal sections which may have their
102 // contents merged before output.
103 class ConcatInputSection final : public InputSection {
104 public:
105 ConcatInputSection(const Section §ion, ArrayRef<uint8_t> data,
106 uint32_t align = 1)
InputSection(ConcatKind,section,data,align)107 : InputSection(ConcatKind, section, data, align) {}
108
getOffset(uint64_t off)109 uint64_t getOffset(uint64_t off) const override { return outSecOff + off; }
getVA()110 uint64_t getVA() const { return InputSection::getVA(0); }
111 // ConcatInputSections are entirely live or dead, so the offset is irrelevant.
isLive(uint64_t off)112 bool isLive(uint64_t off) const override { return live; }
markLive(uint64_t off)113 void markLive(uint64_t off) override { live = true; }
isCoalescedWeak()114 bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); }
shouldOmitFromOutput()115 bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
116 void writeTo(uint8_t *buf);
117
118 void foldIdentical(ConcatInputSection *redundant);
canonical()119 ConcatInputSection *canonical() override {
120 return replacement ? replacement : this;
121 }
canonical()122 const InputSection *canonical() const override {
123 return replacement ? replacement : this;
124 }
125
classof(const InputSection * isec)126 static bool classof(const InputSection *isec) {
127 return isec->kind() == ConcatKind;
128 }
129
130 // Points to the surviving section after this one is folded by ICF
131 ConcatInputSection *replacement = nullptr;
132 // Equivalence-class ID for ICF
133 uint32_t icfEqClass[2] = {0, 0};
134
135 // With subsections_via_symbols, most symbols have their own InputSection,
136 // and for weak symbols (e.g. from inline functions), only the
137 // InputSection from one translation unit will make it to the output,
138 // while all copies in other translation units are coalesced into the
139 // first and not copied to the output.
140 bool wasCoalesced = false;
141 bool live = !config->deadStrip;
142 bool hasCallSites = false;
143 // This variable has two usages. Initially, it represents the input order.
144 // After assignAddresses is called, it represents the offset from the
145 // beginning of the output section this section was assigned to.
146 uint64_t outSecOff = 0;
147 };
148
149 // Initialize a fake InputSection that does not belong to any InputFile.
150 ConcatInputSection *makeSyntheticInputSection(StringRef segName,
151 StringRef sectName,
152 uint32_t flags = 0,
153 ArrayRef<uint8_t> data = {},
154 uint32_t align = 1);
155
156 // Helper functions to make it easy to sprinkle asserts.
157
shouldOmitFromOutput(InputSection * isec)158 inline bool shouldOmitFromOutput(InputSection *isec) {
159 return isa<ConcatInputSection>(isec) &&
160 cast<ConcatInputSection>(isec)->shouldOmitFromOutput();
161 }
162
isCoalescedWeak(InputSection * isec)163 inline bool isCoalescedWeak(InputSection *isec) {
164 return isa<ConcatInputSection>(isec) &&
165 cast<ConcatInputSection>(isec)->isCoalescedWeak();
166 }
167
168 // We allocate a lot of these and binary search on them, so they should be as
169 // compact as possible. Hence the use of 31 rather than 64 bits for the hash.
170 struct StringPiece {
171 // Offset from the start of the containing input section.
172 uint32_t inSecOff;
173 uint32_t live : 1;
174 // Only set if deduplicating literals
175 uint32_t hash : 31;
176 // Offset from the start of the containing output section.
177 uint64_t outSecOff = 0;
178
StringPieceStringPiece179 StringPiece(uint64_t off, uint32_t hash)
180 : inSecOff(off), live(!config->deadStrip), hash(hash) {}
181 };
182
183 static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");
184
185 // CStringInputSections are composed of multiple null-terminated string
186 // literals, which we represent using StringPieces. These literals can be
187 // deduplicated and tail-merged, so translating offsets between the input and
188 // outputs sections is more complicated.
189 //
190 // NOTE: One significant difference between LLD and ld64 is that we merge all
191 // cstring literals, even those referenced directly by non-private symbols.
192 // ld64 is more conservative and does not do that. This was mostly done for
193 // implementation simplicity; if we find programs that need the more
194 // conservative behavior we can certainly implement that.
195 class CStringInputSection final : public InputSection {
196 public:
CStringInputSection(const Section & section,ArrayRef<uint8_t> data,uint32_t align,bool dedupLiterals)197 CStringInputSection(const Section §ion, ArrayRef<uint8_t> data,
198 uint32_t align, bool dedupLiterals)
199 : InputSection(CStringLiteralKind, section, data, align),
200 deduplicateLiterals(dedupLiterals) {}
201
202 uint64_t getOffset(uint64_t off) const override;
isLive(uint64_t off)203 bool isLive(uint64_t off) const override { return getStringPiece(off).live; }
markLive(uint64_t off)204 void markLive(uint64_t off) override { getStringPiece(off).live = true; }
205 // Find the StringPiece that contains this offset.
206 StringPiece &getStringPiece(uint64_t off);
207 const StringPiece &getStringPiece(uint64_t off) const;
208 // Split at each null byte.
209 void splitIntoPieces();
210
211 LLVM_ATTRIBUTE_ALWAYS_INLINE
getStringRef(size_t i)212 StringRef getStringRef(size_t i) const {
213 size_t begin = pieces[i].inSecOff;
214 // The endpoint should be *at* the null terminator, not after. This matches
215 // the behavior of StringRef(const char *Str).
216 size_t end =
217 ((pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff) - 1;
218 return toStringRef(data.slice(begin, end - begin));
219 }
220
221 // Returns i'th piece as a CachedHashStringRef. This function is very hot when
222 // string merging is enabled, so we want to inline.
223 LLVM_ATTRIBUTE_ALWAYS_INLINE
getCachedHashStringRef(size_t i)224 llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const {
225 assert(deduplicateLiterals);
226 return {getStringRef(i), pieces[i].hash};
227 }
228
classof(const InputSection * isec)229 static bool classof(const InputSection *isec) {
230 return isec->kind() == CStringLiteralKind;
231 }
232
233 bool deduplicateLiterals = false;
234 std::vector<StringPiece> pieces;
235 };
236
237 class WordLiteralInputSection final : public InputSection {
238 public:
239 WordLiteralInputSection(const Section §ion, ArrayRef<uint8_t> data,
240 uint32_t align);
241 uint64_t getOffset(uint64_t off) const override;
isLive(uint64_t off)242 bool isLive(uint64_t off) const override {
243 return live[off >> power2LiteralSize];
244 }
markLive(uint64_t off)245 void markLive(uint64_t off) override {
246 live[off >> power2LiteralSize] = true;
247 }
248
classof(const InputSection * isec)249 static bool classof(const InputSection *isec) {
250 return isec->kind() == WordLiteralKind;
251 }
252
253 private:
254 unsigned power2LiteralSize;
255 // The liveness of data[off] is tracked by live[off >> power2LiteralSize].
256 llvm::BitVector live;
257 };
258
sectionType(uint32_t flags)259 inline uint8_t sectionType(uint32_t flags) {
260 return flags & llvm::MachO::SECTION_TYPE;
261 }
262
isZeroFill(uint32_t flags)263 inline bool isZeroFill(uint32_t flags) {
264 return llvm::MachO::isVirtualSection(sectionType(flags));
265 }
266
isThreadLocalVariables(uint32_t flags)267 inline bool isThreadLocalVariables(uint32_t flags) {
268 return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES;
269 }
270
271 // These sections contain the data for initializing thread-local variables.
isThreadLocalData(uint32_t flags)272 inline bool isThreadLocalData(uint32_t flags) {
273 return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR ||
274 sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL;
275 }
276
isDebugSection(uint32_t flags)277 inline bool isDebugSection(uint32_t flags) {
278 return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) ==
279 llvm::MachO::S_ATTR_DEBUG;
280 }
281
isWordLiteralSection(uint32_t flags)282 inline bool isWordLiteralSection(uint32_t flags) {
283 return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS ||
284 sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS ||
285 sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS;
286 }
287
288 bool isCodeSection(const InputSection *);
289 bool isCfStringSection(const InputSection *);
290 bool isClassRefsSection(const InputSection *);
291 bool isSelRefsSection(const InputSection *);
292 bool isEhFrameSection(const InputSection *);
293 bool isGccExceptTabSection(const InputSection *);
294
295 extern std::vector<ConcatInputSection *> inputSections;
296
297 namespace section_names {
298
299 constexpr const char authGot[] = "__auth_got";
300 constexpr const char authPtr[] = "__auth_ptr";
301 constexpr const char binding[] = "__binding";
302 constexpr const char bitcodeBundle[] = "__bundle";
303 constexpr const char cString[] = "__cstring";
304 constexpr const char cfString[] = "__cfstring";
305 constexpr const char cgProfile[] = "__cg_profile";
306 constexpr const char chainFixups[] = "__chainfixups";
307 constexpr const char codeSignature[] = "__code_signature";
308 constexpr const char common[] = "__common";
309 constexpr const char compactUnwind[] = "__compact_unwind";
310 constexpr const char data[] = "__data";
311 constexpr const char debugAbbrev[] = "__debug_abbrev";
312 constexpr const char debugInfo[] = "__debug_info";
313 constexpr const char debugLine[] = "__debug_line";
314 constexpr const char debugStr[] = "__debug_str";
315 constexpr const char debugStrOffs[] = "__debug_str_offs";
316 constexpr const char ehFrame[] = "__eh_frame";
317 constexpr const char gccExceptTab[] = "__gcc_except_tab";
318 constexpr const char export_[] = "__export";
319 constexpr const char dataInCode[] = "__data_in_code";
320 constexpr const char functionStarts[] = "__func_starts";
321 constexpr const char got[] = "__got";
322 constexpr const char header[] = "__mach_header";
323 constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
324 constexpr const char initOffsets[] = "__init_offsets";
325 constexpr const char const_[] = "__const";
326 constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";
327 constexpr const char lazyBinding[] = "__lazy_binding";
328 constexpr const char literals[] = "__literals";
329 constexpr const char moduleInitFunc[] = "__mod_init_func";
330 constexpr const char moduleTermFunc[] = "__mod_term_func";
331 constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
332 constexpr const char objcCatList[] = "__objc_catlist";
333 constexpr const char objcClassList[] = "__objc_classlist";
334 constexpr const char objcClassRefs[] = "__objc_classrefs";
335 constexpr const char objcConst[] = "__objc_const";
336 constexpr const char objCImageInfo[] = "__objc_imageinfo";
337 constexpr const char objcStubs[] = "__objc_stubs";
338 constexpr const char objcSelrefs[] = "__objc_selrefs";
339 constexpr const char objcMethname[] = "__objc_methname";
340 constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";
341 constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";
342 constexpr const char objcProtoList[] = "__objc_protolist";
343 constexpr const char pageZero[] = "__pagezero";
344 constexpr const char pointers[] = "__pointers";
345 constexpr const char rebase[] = "__rebase";
346 constexpr const char staticInit[] = "__StaticInit";
347 constexpr const char stringTable[] = "__string_table";
348 constexpr const char stubHelper[] = "__stub_helper";
349 constexpr const char stubs[] = "__stubs";
350 constexpr const char swift[] = "__swift";
351 constexpr const char symbolTable[] = "__symbol_table";
352 constexpr const char textCoalNt[] = "__textcoal_nt";
353 constexpr const char text[] = "__text";
354 constexpr const char threadPtrs[] = "__thread_ptrs";
355 constexpr const char threadVars[] = "__thread_vars";
356 constexpr const char unwindInfo[] = "__unwind_info";
357 constexpr const char weakBinding[] = "__weak_binding";
358 constexpr const char zeroFill[] = "__zerofill";
359 constexpr const char addrSig[] = "__llvm_addrsig";
360
361 } // namespace section_names
362
363 } // namespace macho
364
365 std::string toString(const macho::InputSection *);
366
367 } // namespace lld
368
369 #endif
370