1 //===- MarkLive.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements --gc-sections, which is a feature to remove unused
10 // chunks from the output. Unused chunks are those that are not reachable from
11 // known root symbols or chunks. This feature is implemented as a mark-sweep
12 // garbage collector.
13 //
14 // Here's how it works. Each InputChunk has a "Live" bit. The bit is off by
15 // default. Starting with the GC-roots, visit all reachable chunks and set their
16 // Live bits. The Writer will then ignore chunks whose Live bits are off, so
17 // that such chunk are not appear in the output.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #include "MarkLive.h"
22 #include "Config.h"
23 #include "InputChunks.h"
24 #include "InputElement.h"
25 #include "SymbolTable.h"
26 #include "Symbols.h"
27 
28 #define DEBUG_TYPE "lld"
29 
30 using namespace llvm;
31 using namespace llvm::wasm;
32 
33 namespace lld {
34 namespace wasm {
35 
36 namespace {
37 
38 class MarkLive {
39 public:
40   void run();
41 
42 private:
43   void enqueue(Symbol *sym);
44   void enqueueInitFunctions(const ObjFile *sym);
45   void mark();
46   bool isCallCtorsLive();
47 
48   // A list of chunks to visit.
49   SmallVector<InputChunk *, 256> queue;
50 };
51 
52 } // namespace
53 
enqueue(Symbol * sym)54 void MarkLive::enqueue(Symbol *sym) {
55   if (!sym || sym->isLive())
56     return;
57   LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
58 
59   InputFile *file = sym->getFile();
60   bool needInitFunctions = file && !file->isLive() && sym->isDefined();
61 
62   sym->markLive();
63 
64   // Mark ctor functions in the object that defines this symbol live.
65   // The ctor functions are all referenced by the synthetic callCtors
66   // function. However, this function does not contain relocations so we
67   // have to manually mark the ctors as live.
68   if (needInitFunctions)
69     enqueueInitFunctions(cast<ObjFile>(file));
70 
71   if (InputChunk *chunk = sym->getChunk())
72     queue.push_back(chunk);
73 }
74 
75 // The ctor functions are all referenced by the synthetic callCtors
76 // function.  However, this function does not contain relocations so we
77 // have to manually mark the ctors as live.
enqueueInitFunctions(const ObjFile * obj)78 void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
79   const WasmLinkingData &l = obj->getWasmObj()->linkingData();
80   for (const WasmInitFunc &f : l.InitFunctions) {
81     auto *initSym = obj->getFunctionSymbol(f.Symbol);
82     if (!initSym->isDiscarded())
83       enqueue(initSym);
84   }
85 }
86 
run()87 void MarkLive::run() {
88   // Add GC root symbols.
89   if (!config->entry.empty())
90     enqueue(symtab->find(config->entry));
91 
92   // We need to preserve any no-strip or exported symbol
93   for (Symbol *sym : symtab->getSymbols())
94     if (sym->isNoStrip() || sym->isExported())
95       enqueue(sym);
96 
97   if (WasmSym::callDtors)
98     enqueue(WasmSym::callDtors);
99 
100   // Enqueue constructors in objects explicitly live from the command-line.
101   for (const ObjFile *obj : symtab->objectFiles)
102     if (obj->isLive())
103       enqueueInitFunctions(obj);
104 
105   mark();
106 
107   // If we have any non-discarded init functions, mark `__wasm_call_ctors` as
108   // live so that we assign it an index and call it.
109   if (isCallCtorsLive())
110     WasmSym::callCtors->markLive();
111 }
112 
mark()113 void MarkLive::mark() {
114   // Follow relocations to mark all reachable chunks.
115   while (!queue.empty()) {
116     InputChunk *c = queue.pop_back_val();
117 
118     for (const WasmRelocation reloc : c->getRelocations()) {
119       if (reloc.Type == R_WASM_TYPE_INDEX_LEB)
120         continue;
121       Symbol *sym = c->file->getSymbol(reloc.Index);
122 
123       // If the function has been assigned the special index zero in the table,
124       // the relocation doesn't pull in the function body, since the function
125       // won't actually go in the table (the runtime will trap attempts to call
126       // that index, since we don't use it).  A function with a table index of
127       // zero is only reachable via "call", not via "call_indirect".  The stub
128       // functions used for weak-undefined symbols have this behaviour (compare
129       // equal to null pointer, only reachable via direct call).
130       if (reloc.Type == R_WASM_TABLE_INDEX_SLEB ||
131           reloc.Type == R_WASM_TABLE_INDEX_SLEB64 ||
132           reloc.Type == R_WASM_TABLE_INDEX_I32 ||
133           reloc.Type == R_WASM_TABLE_INDEX_I64) {
134         auto *funcSym = cast<FunctionSymbol>(sym);
135         if (funcSym->isStub)
136           continue;
137       }
138 
139       enqueue(sym);
140     }
141   }
142 }
143 
markLive()144 void markLive() {
145   if (!config->gcSections)
146     return;
147 
148   LLVM_DEBUG(dbgs() << "markLive\n");
149 
150   MarkLive marker;
151   marker.run();
152 
153   // Report garbage-collected sections.
154   if (config->printGcSections) {
155     for (const ObjFile *obj : symtab->objectFiles) {
156       for (InputChunk *c : obj->functions)
157         if (!c->live)
158           message("removing unused section " + toString(c));
159       for (InputChunk *c : obj->segments)
160         if (!c->live)
161           message("removing unused section " + toString(c));
162       for (InputGlobal *g : obj->globals)
163         if (!g->live)
164           message("removing unused section " + toString(g));
165       for (InputTag *t : obj->tags)
166         if (!t->live)
167           message("removing unused section " + toString(t));
168       for (InputTable *t : obj->tables)
169         if (!t->live)
170           message("removing unused section " + toString(t));
171     }
172     for (InputChunk *c : symtab->syntheticFunctions)
173       if (!c->live)
174         message("removing unused section " + toString(c));
175     for (InputGlobal *g : symtab->syntheticGlobals)
176       if (!g->live)
177         message("removing unused section " + toString(g));
178     for (InputTable *t : symtab->syntheticTables)
179       if (!t->live)
180         message("removing unused section " + toString(t));
181   }
182 }
183 
isCallCtorsLive()184 bool MarkLive::isCallCtorsLive() {
185   // In a reloctable link, we don't call `__wasm_call_ctors`.
186   if (config->relocatable)
187     return false;
188 
189   // In Emscripten-style PIC, we call `__wasm_call_ctors` which calls
190   // `__wasm_apply_data_relocs`.
191   if (config->isPic)
192     return true;
193 
194   // If there are any init functions, mark `__wasm_call_ctors` live so that
195   // it can call them.
196   for (const ObjFile *file : symtab->objectFiles) {
197     const WasmLinkingData &l = file->getWasmObj()->linkingData();
198     for (const WasmInitFunc &f : l.InitFunctions) {
199       auto *sym = file->getFunctionSymbol(f.Symbol);
200       if (!sym->isDiscarded() && sym->isLive())
201         return true;
202     }
203   }
204 
205   return false;
206 }
207 
208 } // namespace wasm
209 } // namespace lld
210