1 //===- MarkLive.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements --gc-sections, which is a feature to remove unused
10 // chunks from the output. Unused chunks are those that are not reachable from
11 // known root symbols or chunks. This feature is implemented as a mark-sweep
12 // garbage collector.
13 //
14 // Here's how it works. Each InputChunk has a "Live" bit. The bit is off by
15 // default. Starting with the GC-roots, visit all reachable chunks and set their
16 // Live bits. The Writer will then ignore chunks whose Live bits are off, so
17 // that such chunk are not appear in the output.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #include "MarkLive.h"
22 #include "Config.h"
23 #include "InputChunks.h"
24 #include "InputEvent.h"
25 #include "InputGlobal.h"
26 #include "SymbolTable.h"
27 #include "Symbols.h"
28 
29 #define DEBUG_TYPE "lld"
30 
31 using namespace llvm;
32 using namespace llvm::wasm;
33 
34 namespace lld {
35 namespace wasm {
36 
37 namespace {
38 
39 class MarkLive {
40 public:
41   void run();
42 
43 private:
44   void enqueue(Symbol *sym);
45   void enqueueInitFunctions(const ObjFile *sym);
46   void markSymbol(Symbol *sym);
47   void mark();
48   bool isCallCtorsLive();
49 
50   // A list of chunks to visit.
51   SmallVector<InputChunk *, 256> queue;
52 };
53 
54 } // namespace
55 
enqueue(Symbol * sym)56 void MarkLive::enqueue(Symbol *sym) {
57   if (!sym || sym->isLive())
58     return;
59   LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
60 
61   InputFile *file = sym->getFile();
62   bool needInitFunctions = file && !file->isLive() && sym->isDefined();
63 
64   sym->markLive();
65 
66   // Mark ctor functions in the object that defines this symbol live.
67   // The ctor functions are all referenced by the synthetic callCtors
68   // function. However, this function does not contain relocations so we
69   // have to manually mark the ctors as live.
70   if (needInitFunctions)
71     enqueueInitFunctions(cast<ObjFile>(file));
72 
73   if (InputChunk *chunk = sym->getChunk())
74     queue.push_back(chunk);
75 }
76 
77 // The ctor functions are all referenced by the synthetic callCtors
78 // function.  However, this function does not contain relocations so we
79 // have to manually mark the ctors as live.
enqueueInitFunctions(const ObjFile * obj)80 void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
81   const WasmLinkingData &l = obj->getWasmObj()->linkingData();
82   for (const WasmInitFunc &f : l.InitFunctions) {
83     auto *initSym = obj->getFunctionSymbol(f.Symbol);
84     if (!initSym->isDiscarded())
85       enqueue(initSym);
86   }
87 }
88 
run()89 void MarkLive::run() {
90   // Add GC root symbols.
91   if (!config->entry.empty())
92     enqueue(symtab->find(config->entry));
93 
94   // We need to preserve any no-strip or exported symbol
95   for (Symbol *sym : symtab->getSymbols())
96     if (sym->isNoStrip() || sym->isExported())
97       enqueue(sym);
98 
99   // If we'll be calling the user's `__wasm_call_dtors` function, mark it live.
100   if (Symbol *callDtors = WasmSym::callDtors)
101     enqueue(callDtors);
102 
103   // In Emscripten-style PIC, `__wasm_call_ctors` calls `__wasm_apply_relocs`.
104   if (config->isPic)
105     enqueue(WasmSym::applyRelocs);
106 
107   if (config->sharedMemory && !config->shared)
108     enqueue(WasmSym::initMemory);
109 
110   // Enqueue constructors in objects explicitly live from the command-line.
111   for (const ObjFile *obj : symtab->objectFiles)
112     if (obj->isLive())
113       enqueueInitFunctions(obj);
114 
115   mark();
116 
117   // If we have any non-discarded init functions, mark `__wasm_call_ctors` as
118   // live so that we assign it an index and call it.
119   if (isCallCtorsLive())
120     WasmSym::callCtors->markLive();
121 }
122 
mark()123 void MarkLive::mark() {
124   // Follow relocations to mark all reachable chunks.
125   while (!queue.empty()) {
126     InputChunk *c = queue.pop_back_val();
127 
128     for (const WasmRelocation reloc : c->getRelocations()) {
129       if (reloc.Type == R_WASM_TYPE_INDEX_LEB)
130         continue;
131       Symbol *sym = c->file->getSymbol(reloc.Index);
132 
133       // If the function has been assigned the special index zero in the table,
134       // the relocation doesn't pull in the function body, since the function
135       // won't actually go in the table (the runtime will trap attempts to call
136       // that index, since we don't use it).  A function with a table index of
137       // zero is only reachable via "call", not via "call_indirect".  The stub
138       // functions used for weak-undefined symbols have this behaviour (compare
139       // equal to null pointer, only reachable via direct call).
140       if (reloc.Type == R_WASM_TABLE_INDEX_SLEB ||
141           reloc.Type == R_WASM_TABLE_INDEX_SLEB64 ||
142           reloc.Type == R_WASM_TABLE_INDEX_I32 ||
143           reloc.Type == R_WASM_TABLE_INDEX_I64) {
144         auto *funcSym = cast<FunctionSymbol>(sym);
145         if (funcSym->hasTableIndex() && funcSym->getTableIndex() == 0)
146           continue;
147       }
148 
149       enqueue(sym);
150     }
151   }
152 }
153 
markLive()154 void markLive() {
155   if (!config->gcSections)
156     return;
157 
158   LLVM_DEBUG(dbgs() << "markLive\n");
159 
160   MarkLive marker;
161   marker.run();
162 
163   // Report garbage-collected sections.
164   if (config->printGcSections) {
165     for (const ObjFile *obj : symtab->objectFiles) {
166       for (InputChunk *c : obj->functions)
167         if (!c->live)
168           message("removing unused section " + toString(c));
169       for (InputChunk *c : obj->segments)
170         if (!c->live)
171           message("removing unused section " + toString(c));
172       for (InputGlobal *g : obj->globals)
173         if (!g->live)
174           message("removing unused section " + toString(g));
175       for (InputEvent *e : obj->events)
176         if (!e->live)
177           message("removing unused section " + toString(e));
178     }
179     for (InputChunk *c : symtab->syntheticFunctions)
180       if (!c->live)
181         message("removing unused section " + toString(c));
182     for (InputGlobal *g : symtab->syntheticGlobals)
183       if (!g->live)
184         message("removing unused section " + toString(g));
185   }
186 }
187 
isCallCtorsLive()188 bool MarkLive::isCallCtorsLive() {
189   // In a reloctable link, we don't call `__wasm_call_ctors`.
190   if (config->relocatable)
191     return false;
192 
193   // In Emscripten-style PIC, we call `__wasm_call_ctors` which calls
194   // `__wasm_apply_relocs`.
195   if (config->isPic)
196     return true;
197 
198   // If there are any init functions, mark `__wasm_call_ctors` live so that
199   // it can call them.
200   for (const ObjFile *file : symtab->objectFiles) {
201     const WasmLinkingData &l = file->getWasmObj()->linkingData();
202     for (const WasmInitFunc &f : l.InitFunctions) {
203       auto *sym = file->getFunctionSymbol(f.Symbol);
204       if (!sym->isDiscarded() && sym->isLive())
205         return true;
206     }
207   }
208 
209   return false;
210 }
211 
212 } // namespace wasm
213 } // namespace lld
214