1 /* ----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2013-
4 *
5 * Check whether dynamically-loaded object code can be safely
6 * unloaded, by searching for references to it from the heap and RTS
7 * data structures.
8 *
9 * --------------------------------------------------------------------------*/
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Hash.h"
16 #include "LinkerInternals.h"
17 #include "CheckUnload.h"
18 #include "sm/Storage.h"
19 #include "sm/GCThread.h"
20 #include "sm/HeapUtils.h"
21
22 //
23 // Note [Object unloading]
24 // ~~~~~~~~~~~~~~~~~~~~~~~
25 //
26 // Overview of object unloading:
27 //
28 // - In a major GC, for every static object we mark the object's object code and
29 // its dependencies as 'live'. This is done by `markObjectCode`, called by
30 // `evacuate`.
31 //
32 // - Marking object code is done using a global "section index table"
33 // (global_s_indices below). When we load an object code we add its section
34 // indices to the table. `markObjectCode` does binary search on this table to
35 // find object code for the marked object, and mark it and its dependencies.
36 //
37 // Dependency of an object code is simply other object code that the object
38 // code refers to in its code. We know these dependencies by the relocations
39 // present in the referent. This is recorded by lookupSymbolDependent.
40 //
41 // - global_s_indices is updated as we load and unload objects. When we load an
42 // object code we add its section indices to the table, we remove those
43 // indices when we unload.
44 //
45 // The table is sorted and old indices are removed in `checkUnload`, instead
46 // on every load/unload, to avoid quadratic behavior when we load a list of
47 // objects.
48 //
49 // - After a major GC `checkUnload` unloads objects that are (1) explicitly
50 // asked for unloading (via `unloadObj`) and (2) are not marked during GC.
51 //
52 // Note that, crucially, we don't unload an object code even if it's not
53 // reachable from the heap, unless it's explicitly asked for unloading (via
54 // `unloadObj`). This is a feature and not a but! Two use cases:
55 //
56 // - The user might request a symbol from a loaded object at any point with
57 // lookupSymbol (e.g. GHCi might do this).
58 //
59 // - Sometimes we load objects that are not Haskell objects.
60 //
61 // To avoid unloading objects that are unreachable but are not asked for
62 // unloading we maintain a "root set" of object code, `loaded_objects` below.
63 // `loadObj` adds the loaded objects (and its dependencies) to the list.
64 // `unloadObj` removes. After a major GC, `checkUnload` first marks the root set
65 // (`loaded_objects`) to avoid unloading objects that are not asked for
66 // unloading.
67 //
68 // Two other lists `objects` and `old_objects` are similar to large object lists
69 // in GC. Before a major GC we move `objects` to `old_objects`, and move marked
70 // objects back to `objects` during evacuation and when marking roots in
71 // `checkUnload`. Any objects in `old_objects` after that is unloaded.
72 //
73 // TODO: We currently don't unload objects when non-moving GC is enabled. The
74 // implementation would be similar to `nonmovingGcCafs`:
75 //
76 // - Maintain a "snapshot":
77 //
78 // - Copy `loaded_objects` as the root set of the snapshot
79 //
80 // - Stash `objects` to `old_objects` as the snapshot. We don't need a new
81 // list for this as `old_objects` won't be used by any other code when
82 // non-moving GC is enabled.
83 //
84 // - Copy `global_s_indices` table to be able to mark objects while mutators
85 // call `loadObj_` and `unloadObj_` concurrently.
86 //
87 // - Don't mark object code in `evacuate`, marking will be done in the
88 // non-moving collector.
89 //
90 // - After preparation, bump the object code mark bit (`object_code_mark_bit`
91 // below) and mark static objects using a version of `markObjectCode` that
92 // basically does the same thing but:
93 //
94 // - Needs to update `objects` list in a thread-safe way, as mutators will be
95 // concurrently calling `loadObj_` and add new stuff to `objects`.
96 // (alternatively we could have a new list for non-moving GC's objects list,
97 // and then merge it to the global list in the pause before moving to
98 // concurrent sweep phase)
99 //
100 // - Needs to use the copied `global_s_indices`
101 //
102 // - After marking anything left in `old_objects` are unreachable objects within
103 // the snapshot, unload those. The unload loop will be the same as in
104 // `checkUnload`. This step needs to happen in the final sync (before sweep
105 // begins) to avoid races when updating `global_s_indices`.
106 //
107 // - NOTE: We don't need write barriers in loadObj/unloadObj as we don't
108 // introduce a dependency from an already-loaded object to a newly loaded
109 // object and we don't delete existing dependencies.
110 //
111
112 uint8_t object_code_mark_bit = 0;
113
114 typedef struct {
115 W_ start;
116 W_ end;
117 ObjectCode *oc;
118 } OCSectionIndex;
119
120 typedef struct {
121 int capacity; // Doubled on resize
122 int n_sections;
123 bool sorted; // Invalidated on insertion. Sorted in checkUnload.
124 bool unloaded; // Whether we removed anything from the table in
125 // removeOCSectionIndices. If this is set we "compact" the
126 // table (remove unused entries) in `sortOCSectionIndices.
127 OCSectionIndex *indices;
128 } OCSectionIndices;
129
130 // List of currently live objects. Moved to `old_objects` before unload check.
131 // Marked objects moved back to this list in `markObjectLive`. Remaining objects
132 // are freed at the end of `checkUnload`.
133 //
134 // Double-linked list to be able to remove marked objects. List formed with
135 // `next` and `prev` fields of `ObjectCode`.
136 //
137 // Not static: used in Linker.c.
138 ObjectCode *objects = NULL;
139
140 // `objects` list is moved here before unload check. Marked objects are moved
141 // back to `objects`. Remaining objects are freed.
142 static ObjectCode *old_objects = NULL;
143
144 // Number of objects that we want to unload. When this value is 0 we skip static
145 // object marking during GC and `checkUnload`.
146 //
147 // Not static: we use this value to skip static object marking in evacuate when
148 // this is 0.
149 //
150 // Incremented in `unloadObj_`, decremented as we unload objects in
151 // `checkUnload`.
152 int n_unloaded_objects = 0;
153
154 // List of objects that we don't want to unload (i.e. we haven't called
155 // unloadObj on these yet). Used as root set for unload check in checkUnload.
156 // Objects are added with loadObj_ and removed with unloadObj_.
157 //
158 // List formed with `next_loaded_object` field of `ObjectCode`.
159 //
160 // Not static: used in Linker.c.
161 ObjectCode *loaded_objects;
162
163 // Section index table for currently loaded objects. New indices are added by
164 // `loadObj_`, indices of unloaded objects are removed in `checkUnload`. Used to
165 // map static closures to their ObjectCode.
166 static OCSectionIndices *global_s_indices = NULL;
167
createOCSectionIndices(void)168 static OCSectionIndices *createOCSectionIndices(void)
169 {
170 // TODO (osa): Maybe initialize as empty (without allocation) and allocate
171 // on first insertion?
172 OCSectionIndices *s_indices = stgMallocBytes(sizeof(OCSectionIndices), "OCSectionIndices");
173 int capacity = 1024;
174 s_indices->capacity = capacity;
175 s_indices->n_sections = 0;
176 s_indices->sorted = true;
177 s_indices->unloaded = false;
178 s_indices->indices = stgMallocBytes(capacity * sizeof(OCSectionIndex),
179 "OCSectionIndices::indices");
180 return s_indices;
181 }
182
freeOCSectionIndices(OCSectionIndices * s_indices)183 static void freeOCSectionIndices(OCSectionIndices *s_indices)
184 {
185 free(s_indices->indices);
186 free(s_indices);
187 }
188
initUnloadCheck()189 void initUnloadCheck()
190 {
191 global_s_indices = createOCSectionIndices();
192 }
193
exitUnloadCheck()194 void exitUnloadCheck()
195 {
196 freeOCSectionIndices(global_s_indices);
197 global_s_indices = NULL;
198 }
199
cmpSectionIndex(const void * indexa,const void * indexb)200 static int cmpSectionIndex(const void* indexa, const void *indexb)
201 {
202 W_ s1 = ((OCSectionIndex*)indexa)->start;
203 W_ s2 = ((OCSectionIndex*)indexb)->start;
204 if (s1 < s2) {
205 return -1;
206 } else if (s1 > s2) {
207 return 1;
208 }
209 return 0;
210 }
211
reserveOCSectionIndices(OCSectionIndices * s_indices,int len)212 static void reserveOCSectionIndices(OCSectionIndices *s_indices, int len)
213 {
214 int current_capacity = s_indices->capacity;
215 int current_len = s_indices->n_sections;
216 if (current_capacity - current_len >= len) {
217 return;
218 }
219
220 // Round up to nearest power of 2
221 int new_capacity = 1 << (int)ceil(log2(current_len + len));
222
223 OCSectionIndex *old_indices = s_indices->indices;
224 OCSectionIndex *new_indices = stgMallocBytes(new_capacity * sizeof(OCSectionIndex),
225 "reserveOCSectionIndices");
226
227 for (int i = 0; i < current_len; ++i) {
228 new_indices[i] = old_indices[i];
229 }
230
231 s_indices->capacity = new_capacity;
232 s_indices->indices = new_indices;
233
234 free(old_indices);
235 }
236
237 // Insert object section indices of a single ObjectCode. Invalidates 'sorted'
238 // state.
insertOCSectionIndices(ObjectCode * oc)239 void insertOCSectionIndices(ObjectCode *oc)
240 {
241 reserveOCSectionIndices(global_s_indices, oc->n_sections);
242 global_s_indices->sorted = false;
243
244 int s_i = global_s_indices->n_sections;
245 for (int i = 0; i < oc->n_sections; i++) {
246 if (oc->sections[i].kind != SECTIONKIND_OTHER) {
247 global_s_indices->indices[s_i].start = (W_)oc->sections[i].start;
248 global_s_indices->indices[s_i].end = (W_)oc->sections[i].start
249 + oc->sections[i].size;
250 global_s_indices->indices[s_i].oc = oc;
251 s_i++;
252 }
253 }
254
255 global_s_indices->n_sections = s_i;
256
257 // Add object to 'objects' list
258 if (objects != NULL) {
259 objects->prev = oc;
260 }
261 oc->next = objects;
262 objects = oc;
263 }
264
265 static int findSectionIdx(OCSectionIndices *s_indices, const void *addr);
266
removeOCSectionIndices(OCSectionIndices * s_indices,ObjectCode * oc)267 static void removeOCSectionIndices(OCSectionIndices *s_indices, ObjectCode *oc)
268 {
269 // To avoid quadratic behavior in checkUnload we set `oc` fields of indices
270 // of unloaded objects NULL here. Removing unused entries is done in
271 // `sortOCSectionIndices`.
272
273 s_indices->unloaded = true;
274
275 for (int i = 0; i < oc->n_sections; i++) {
276 if (oc->sections[i].kind != SECTIONKIND_OTHER) {
277 int section_idx = findSectionIdx(s_indices, oc->sections[i].start);
278 if (section_idx != -1) {
279 s_indices->indices[section_idx].oc = NULL;
280 }
281 }
282 }
283 }
284
sortOCSectionIndices(OCSectionIndices * s_indices)285 static void sortOCSectionIndices(OCSectionIndices *s_indices) {
286 if (s_indices->sorted) {
287 return;
288 }
289
290 qsort(s_indices->indices,
291 s_indices->n_sections,
292 sizeof(OCSectionIndex),
293 cmpSectionIndex);
294
295 s_indices->sorted = true;
296 }
297
removeRemovedOCSections(OCSectionIndices * s_indices)298 static void removeRemovedOCSections(OCSectionIndices *s_indices) {
299 if (!s_indices->unloaded) {
300 return;
301 }
302
303 int next_free_idx = 0;
304 for (int i = 0; i < s_indices->n_sections; ++i) {
305 if (s_indices->indices[i].oc == NULL) {
306 // free entry, skip
307 } else if (i == next_free_idx) {
308 ++next_free_idx;
309 } else {
310 s_indices->indices[next_free_idx] = s_indices->indices[i];
311 ++next_free_idx;
312 }
313 }
314
315 s_indices->n_sections = next_free_idx;
316 s_indices->unloaded = true;
317 }
318
319 // Returns -1 if not found
findSectionIdx(OCSectionIndices * s_indices,const void * addr)320 static int findSectionIdx(OCSectionIndices *s_indices, const void *addr) {
321 ASSERT(s_indices->sorted);
322
323 W_ w_addr = (W_)addr;
324 if (s_indices->n_sections <= 0) {
325 return -1;
326 }
327 if (w_addr < s_indices->indices[0].start) {
328 return -1;
329 }
330
331 int left = 0, right = s_indices->n_sections;
332 while (left + 1 < right) {
333 int mid = (left + right)/2;
334 W_ w_mid = s_indices->indices[mid].start;
335 if (w_mid <= w_addr) {
336 left = mid;
337 } else {
338 right = mid;
339 }
340 }
341 ASSERT(w_addr >= s_indices->indices[left].start);
342 if (w_addr < s_indices->indices[left].end) {
343 return left;
344 }
345 return -1;
346 }
347
findOC(OCSectionIndices * s_indices,const void * addr)348 static ObjectCode *findOC(OCSectionIndices *s_indices, const void *addr) {
349 int oc_idx = findSectionIdx(s_indices, addr);
350
351 if (oc_idx == -1) {
352 return NULL;
353 }
354
355 return s_indices->indices[oc_idx].oc;
356 }
357
markObjectLive(void * data STG_UNUSED,StgWord key,const void * value STG_UNUSED)358 static bool markObjectLive(void *data STG_UNUSED, StgWord key, const void *value STG_UNUSED) {
359 ObjectCode *oc = (ObjectCode*)key;
360
361 // N.B. we may be called by the parallel GC and therefore this must be
362 // thread-safe. To avoid taking the linker_mutex in the fast path
363 // (when the object is already marked) we do an atomic exchange here and
364 // only take the lock in the case that the object is unmarked.
365 if (xchg(&oc->mark, object_code_mark_bit) == object_code_mark_bit) {
366 return true; // for hash table iteration
367 }
368
369 ACQUIRE_LOCK(&linker_mutex);
370 // Remove from 'old_objects' list
371 if (oc->prev != NULL) {
372 // TODO(osa): Maybe 'prev' should be a pointer to the referencing
373 // *field* ? (instead of referencing *object*)
374 oc->prev->next = oc->next;
375 } else {
376 old_objects = oc->next;
377 }
378 if (oc->next != NULL) {
379 oc->next->prev = oc->prev;
380 }
381
382 // Add it to 'objects' list
383 oc->prev = NULL;
384 oc->next = objects;
385 if (objects != NULL) {
386 objects->prev = oc;
387 }
388 objects = oc;
389 RELEASE_LOCK(&linker_mutex);
390
391 // Mark its dependencies
392 iterHashTable(oc->dependencies, NULL, markObjectLive);
393
394 return true; // for hash table iteration
395 }
396
markObjectCode(const void * addr)397 void markObjectCode(const void *addr)
398 {
399 if (global_s_indices == NULL) {
400 return;
401 }
402
403 // This should be checked at the call site
404 ASSERT(!HEAP_ALLOCED(addr));
405
406 ObjectCode *oc = findOC(global_s_indices, addr);
407 if (oc != NULL) {
408 // Mark the object code and its dependencies
409 markObjectLive(NULL, (W_)oc, NULL);
410 }
411 }
412
413 // Returns whether or not the GC that follows needs to mark code for potential
414 // unloading.
prepareUnloadCheck()415 bool prepareUnloadCheck()
416 {
417 if (global_s_indices == NULL) {
418 return false;
419 }
420
421 removeRemovedOCSections(global_s_indices);
422 sortOCSectionIndices(global_s_indices);
423
424 ASSERT(old_objects == NULL);
425
426 object_code_mark_bit = ~object_code_mark_bit;
427 old_objects = objects;
428 objects = NULL;
429 return true;
430 }
431
checkUnload()432 void checkUnload()
433 {
434 if (global_s_indices == NULL) {
435 return;
436 }
437
438 // At this point we've marked all dynamically loaded static objects
439 // (including their dependencies) during GC, but not the root set of object
440 // code (loaded_objects). Mark the roots first, then unload any unmarked
441 // objects.
442
443 OCSectionIndices *s_indices = global_s_indices;
444 ASSERT(s_indices->sorted);
445
446 // Mark roots
447 for (ObjectCode *oc = loaded_objects; oc != NULL; oc = oc->next_loaded_object) {
448 markObjectLive(NULL, (W_)oc, NULL);
449 }
450
451 // Free unmarked objects
452 ObjectCode *next = NULL;
453 for (ObjectCode *oc = old_objects; oc != NULL; oc = next) {
454 next = oc->next;
455
456 removeOCSectionIndices(s_indices, oc);
457
458 // Symbols should be removed by unloadObj_.
459 // NB (osa): If this assertion doesn't hold then freeObjectCode below
460 // will corrupt symhash as keys of that table live in ObjectCodes. If
461 // you see a segfault in a hash table operation in linker (in non-debug
462 // RTS) then it's probably becuse this assertion did not hold.
463 ASSERT(oc->symbols == NULL);
464
465 freeObjectCode(oc);
466 n_unloaded_objects -= 1;
467 }
468
469 old_objects = NULL;
470 }
471