1 //===- MarkLive.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MarkLive.h"
10 #include "Config.h"
11 #include "OutputSegment.h"
12 #include "SymbolTable.h"
13 #include "Symbols.h"
14 #include "UnwindInfoSection.h"
15 
16 #include "lld/Common/ErrorHandler.h"
17 #include "llvm/Support/TimeProfiler.h"
18 
19 #include "mach-o/compact_unwind_encoding.h"
20 
21 namespace lld::macho {
22 
23 using namespace llvm;
24 using namespace llvm::MachO;
25 
26 struct WhyLiveEntry {
27   InputSection *isec;
28   // Keep track of the entry that caused us to mark `isec` as live.
29   const WhyLiveEntry *prev;
30 
31   WhyLiveEntry(InputSection *isec, const WhyLiveEntry *prev)
32       : isec(isec), prev(prev) {}
33 };
34 
35 // Type-erased interface to MarkLiveImpl. Used for adding roots to the liveness
36 // graph.
37 class MarkLive {
38 public:
39   virtual void enqueue(InputSection *isec, uint64_t off) = 0;
40   virtual void addSym(Symbol *s) = 0;
41   virtual void markTransitively() = 0;
42   virtual ~MarkLive() = default;
43 };
44 
45 template <bool RecordWhyLive> class MarkLiveImpl : public MarkLive {
46 public:
47   // -why_live is a rarely used option, so we don't want support for that flag
48   // to slow down the main -dead_strip code path. As such, we employ templates
49   // to avoid the usage of WhyLiveEntry in the main code path. This saves us
50   // from needless allocations and pointer indirections.
51   using WorklistEntry =
52       std::conditional_t<RecordWhyLive, WhyLiveEntry, InputSection>;
53 
54   void enqueue(InputSection *isec, uint64_t off) override {
55     enqueue(isec, off, nullptr);
56   }
57   void addSym(Symbol *s) override { addSym(s, nullptr); }
58   void markTransitively() override;
59 
60 private:
61   void enqueue(InputSection *isec, uint64_t off, const WorklistEntry *prev);
62   void addSym(Symbol *s, const WorklistEntry *prev);
63   const InputSection *getInputSection(const WorklistEntry *) const;
64   WorklistEntry *makeEntry(InputSection *, const WorklistEntry *prev) const;
65 
66   // We build up a worklist of sections which have been marked as live. We
67   // only push into the worklist when we discover an unmarked section, and we
68   // mark as we push, so sections never appear twice in the list. Literal
69   // sections cannot contain references to other sections, so we only store
70   // ConcatInputSections in our worklist.
71   SmallVector<WorklistEntry *, 256> worklist;
72 };
73 
74 template <bool RecordWhyLive>
75 void MarkLiveImpl<RecordWhyLive>::enqueue(
76     InputSection *isec, uint64_t off,
77     const typename MarkLiveImpl<RecordWhyLive>::WorklistEntry *prev) {
78   if (isec->isLive(off))
79     return;
80   isec->markLive(off);
81   if (auto s = dyn_cast<ConcatInputSection>(isec)) {
82     assert(!s->isCoalescedWeak());
83     worklist.push_back(makeEntry(s, prev));
84   }
85 }
86 
87 static void printWhyLive(const Symbol *s, const WhyLiveEntry *prev) {
88   std::string out = toString(*s) + " from " + toString(s->getFile());
89   int indent = 2;
90   for (const WhyLiveEntry *entry = prev; entry;
91        entry = entry->prev, indent += 2) {
92     const TinyPtrVector<Defined *> &symbols = entry->isec->symbols;
93     // With .subsections_with_symbols set, most isecs will have exactly one
94     // entry in their symbols vector, so we just print the first one.
95     if (!symbols.empty())
96       out += "\n" + std::string(indent, ' ') + toString(*symbols.front()) +
97              " from " + toString(symbols.front()->getFile());
98   }
99   message(out);
100 }
101 
102 template <bool RecordWhyLive>
103 void MarkLiveImpl<RecordWhyLive>::addSym(
104     Symbol *s,
105     const typename MarkLiveImpl<RecordWhyLive>::WorklistEntry *prev) {
106   if (s->used)
107     return;
108   s->used = true;
109   if constexpr (RecordWhyLive)
110     if (!config->whyLive.empty() && config->whyLive.match(s->getName()))
111       printWhyLive(s, prev);
112   if (auto *d = dyn_cast<Defined>(s)) {
113     if (d->isec)
114       enqueue(d->isec, d->value, prev);
115     if (d->unwindEntry)
116       enqueue(d->unwindEntry, 0, prev);
117   }
118 }
119 
120 template <bool RecordWhyLive>
121 const InputSection *MarkLiveImpl<RecordWhyLive>::getInputSection(
122     const MarkLiveImpl<RecordWhyLive>::WorklistEntry *entry) const {
123   if constexpr (RecordWhyLive)
124     return entry->isec;
125   else
126     return entry;
127 }
128 
129 template <bool RecordWhyLive>
130 typename MarkLiveImpl<RecordWhyLive>::WorklistEntry *
131 MarkLiveImpl<RecordWhyLive>::makeEntry(
132     InputSection *isec,
133     const MarkLiveImpl<RecordWhyLive>::WorklistEntry *prev) const {
134   if constexpr (RecordWhyLive) {
135     if (!isec) {
136       assert(!prev);
137       return nullptr;
138     }
139     return make<WhyLiveEntry>(isec, prev);
140   } else {
141     return isec;
142   }
143 }
144 
145 template <bool RecordWhyLive>
146 void MarkLiveImpl<RecordWhyLive>::markTransitively() {
147   do {
148     // Mark things reachable from GC roots as live.
149     while (!worklist.empty()) {
150       WorklistEntry *entry = worklist.pop_back_val();
151       // Entries that get placed onto the worklist always contain
152       // ConcatInputSections. `WhyLiveEntry::prev` may point to entries that
153       // contain other types of InputSections (due to S_ATTR_LIVE_SUPPORT), but
154       // those entries should never be pushed onto the worklist.
155       auto *isec = cast<ConcatInputSection>(getInputSection(entry));
156       assert(isec->live && "We mark as live when pushing onto the worklist!");
157 
158       // Mark all symbols listed in the relocation table for this section.
159       for (const Reloc &r : isec->relocs) {
160         if (auto *s = r.referent.dyn_cast<Symbol *>())
161           addSym(s, entry);
162         else
163           enqueue(r.referent.get<InputSection *>(), r.addend, entry);
164       }
165       for (Defined *d : getInputSection(entry)->symbols)
166         addSym(d, entry);
167     }
168 
169     // S_ATTR_LIVE_SUPPORT sections are live if they point _to_ a live
170     // section. Process them in a second pass.
171     for (ConcatInputSection *isec : inputSections) {
172       // FIXME: Check if copying all S_ATTR_LIVE_SUPPORT sections into a
173       // separate vector and only walking that here is faster.
174       if (!(isec->getFlags() & S_ATTR_LIVE_SUPPORT) || isec->live)
175         continue;
176 
177       for (const Reloc &r : isec->relocs) {
178         if (auto *s = r.referent.dyn_cast<Symbol *>()) {
179           if (s->isLive()) {
180             InputSection *referentIsec = nullptr;
181             if (auto *d = dyn_cast<Defined>(s))
182               referentIsec = d->isec;
183             enqueue(isec, 0, makeEntry(referentIsec, nullptr));
184           }
185         } else {
186           auto *referentIsec = r.referent.get<InputSection *>();
187           if (referentIsec->isLive(r.addend))
188             enqueue(isec, 0, makeEntry(referentIsec, nullptr));
189         }
190       }
191     }
192 
193     // S_ATTR_LIVE_SUPPORT could have marked additional sections live,
194     // which in turn could mark additional S_ATTR_LIVE_SUPPORT sections live.
195     // Iterate. In practice, the second iteration won't mark additional
196     // S_ATTR_LIVE_SUPPORT sections live.
197   } while (!worklist.empty());
198 }
199 
200 // Set live bit on for each reachable chunk. Unmarked (unreachable)
201 // InputSections will be ignored by Writer, so they will be excluded
202 // from the final output.
203 void markLive() {
204   TimeTraceScope timeScope("markLive");
205   MarkLive *marker;
206   if (config->whyLive.empty())
207     marker = make<MarkLiveImpl<false>>();
208   else
209     marker = make<MarkLiveImpl<true>>();
210   // Add GC roots.
211   if (config->entry)
212     marker->addSym(config->entry);
213   for (Symbol *sym : symtab->getSymbols()) {
214     if (auto *defined = dyn_cast<Defined>(sym)) {
215       // -exported_symbol(s_list)
216       if (!config->exportedSymbols.empty() &&
217           config->exportedSymbols.match(defined->getName())) {
218         // NOTE: Even though exporting private externs is an ill-defined
219         // operation, we are purposely not checking for privateExtern in
220         // order to follow ld64's behavior of treating all exported private
221         // extern symbols as live, irrespective of whether they are autohide.
222         marker->addSym(defined);
223         continue;
224       }
225 
226       // public symbols explicitly marked .no_dead_strip
227       if (defined->referencedDynamically || defined->noDeadStrip) {
228         marker->addSym(defined);
229         continue;
230       }
231 
232       // FIXME: When we implement these flags, make symbols from them GC
233       // roots:
234       // * -reexported_symbol(s_list)
235       // * -alias_list
236       // * -init
237 
238       // In dylibs and bundles and in executables with -export_dynamic,
239       // all external functions are GC roots.
240       bool externsAreRoots =
241           config->outputType != MH_EXECUTE || config->exportDynamic;
242       if (externsAreRoots && !defined->privateExtern) {
243         marker->addSym(defined);
244         continue;
245       }
246     }
247   }
248   // -u symbols
249   for (Symbol *sym : config->explicitUndefineds)
250     marker->addSym(sym);
251   // local symbols explicitly marked .no_dead_strip
252   for (const InputFile *file : inputFiles)
253     if (auto *objFile = dyn_cast<ObjFile>(file))
254       for (Symbol *sym : objFile->symbols)
255         if (auto *defined = dyn_cast_or_null<Defined>(sym))
256           if (!defined->isExternal() && defined->noDeadStrip)
257             marker->addSym(defined);
258   if (auto *stubBinder =
259           dyn_cast_or_null<DylibSymbol>(symtab->find("dyld_stub_binder")))
260     marker->addSym(stubBinder);
261   for (ConcatInputSection *isec : inputSections) {
262     // Sections marked no_dead_strip
263     if (isec->getFlags() & S_ATTR_NO_DEAD_STRIP) {
264       marker->enqueue(isec, 0);
265       continue;
266     }
267 
268     // mod_init_funcs, mod_term_funcs sections
269     if (sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS ||
270         sectionType(isec->getFlags()) == S_MOD_TERM_FUNC_POINTERS) {
271       assert(!config->emitInitOffsets ||
272              sectionType(isec->getFlags()) != S_MOD_INIT_FUNC_POINTERS);
273       marker->enqueue(isec, 0);
274       continue;
275     }
276   }
277 
278   for (ConcatInputSection *isec : in.initOffsets->inputs())
279     marker->enqueue(isec, 0);
280 
281   marker->markTransitively();
282 }
283 
284 } // namespace lld::macho
285