1 /* ----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 2013-
4  *
5  * Check whether dynamically-loaded object code can be safely
6  * unloaded, by searching for references to it from the heap and RTS
7  * data structures.
8  *
9  * --------------------------------------------------------------------------*/
10 
11 #include "PosixSource.h"
12 #include "Rts.h"
13 
14 #include "RtsUtils.h"
15 #include "Hash.h"
16 #include "LinkerInternals.h"
17 #include "CheckUnload.h"
18 #include "sm/Storage.h"
19 #include "sm/GCThread.h"
20 #include "sm/HeapUtils.h"
21 
22 //
23 // Note [Object unloading]
24 // ~~~~~~~~~~~~~~~~~~~~~~~
25 //
26 // Overview of object unloading:
27 //
28 // - In a major GC, for every static object we mark the object's object code and
29 //   its dependencies as 'live'. This is done by `markObjectCode`, called by
30 //   `evacuate`.
31 //
32 // - Marking object code is done using a global "section index table"
33 //   (global_s_indices below). When we load an object code we add its section
34 //   indices to the table. `markObjectCode` does binary search on this table to
35 //   find object code for the marked object, and mark it and its dependencies.
36 //
37 //   Dependency of an object code is simply other object code that the object
38 //   code refers to in its code. We know these dependencies by the relocations
39 //   present in the referent. This is recorded by lookupSymbolDependent.
40 //
41 // - global_s_indices is updated as we load and unload objects. When we load an
42 //   object code we add its section indices to the table, we remove those
43 //   indices when we unload.
44 //
45 //   The table is sorted and old indices are removed in `checkUnload`, instead
46 //   on every load/unload, to avoid quadratic behavior when we load a list of
47 //   objects.
48 //
49 // - After a major GC `checkUnload` unloads objects that are (1) explicitly
50 //   asked for unloading (via `unloadObj`) and (2) are not marked during GC.
51 //
52 // Note that, crucially, we don't unload an object code even if it's not
53 // reachable from the heap, unless it's explicitly asked for unloading (via
54 // `unloadObj`). This is a feature and not a but! Two use cases:
55 //
56 // - The user might request a symbol from a loaded object at any point with
57 //   lookupSymbol (e.g. GHCi might do this).
58 //
59 // - Sometimes we load objects that are not Haskell objects.
60 //
61 // To avoid unloading objects that are unreachable but are not asked for
62 // unloading we maintain a "root set" of object code, `loaded_objects` below.
63 // `loadObj` adds the loaded objects (and its dependencies) to the list.
64 // `unloadObj` removes. After a major GC, `checkUnload` first marks the root set
65 // (`loaded_objects`) to avoid unloading objects that are not asked for
66 // unloading.
67 //
68 // Two other lists `objects` and `old_objects` are similar to large object lists
69 // in GC. Before a major GC we move `objects` to `old_objects`, and move marked
70 // objects back to `objects` during evacuation and when marking roots in
71 // `checkUnload`. Any objects in `old_objects` after that is unloaded.
72 //
73 // TODO: We currently don't unload objects when non-moving GC is enabled. The
74 // implementation would be similar to `nonmovingGcCafs`:
75 //
76 // - Maintain a "snapshot":
77 //
78 //   - Copy `loaded_objects` as the root set of the snapshot
79 //
80 //   - Stash `objects` to `old_objects` as the snapshot. We don't need a new
81 //     list for this as `old_objects` won't be used by any other code when
82 //     non-moving GC is enabled.
83 //
84 //   - Copy `global_s_indices` table to be able to mark objects while mutators
85 //     call `loadObj_` and `unloadObj_` concurrently.
86 //
87 // - Don't mark object code in `evacuate`, marking will be done in the
88 //   non-moving collector.
89 //
90 // - After preparation, bump the object code mark bit (`object_code_mark_bit`
91 //   below) and mark static objects using a version of `markObjectCode` that
92 //   basically does the same thing but:
93 //
94 //   - Needs to update `objects` list in a thread-safe way, as mutators will be
95 //     concurrently calling `loadObj_` and add new stuff to `objects`.
96 //     (alternatively we could have a new list for non-moving GC's objects list,
97 //     and then merge it to the global list in the pause before moving to
98 //     concurrent sweep phase)
99 //
100 //   - Needs to use the copied `global_s_indices`
101 //
102 // - After marking anything left in `old_objects` are unreachable objects within
103 //   the snapshot, unload those. The unload loop will be the same as in
104 //   `checkUnload`. This step needs to happen in the final sync (before sweep
105 //   begins) to avoid races when updating `global_s_indices`.
106 //
107 // - NOTE: We don't need write barriers in loadObj/unloadObj as we don't
108 //   introduce a dependency from an already-loaded object to a newly loaded
109 //   object and we don't delete existing dependencies.
110 //
111 
112 uint8_t object_code_mark_bit = 0;
113 
114 typedef struct {
115     W_ start;
116     W_ end;
117     ObjectCode *oc;
118 } OCSectionIndex;
119 
120 typedef struct {
121     int capacity; // Doubled on resize
122     int n_sections;
123     bool sorted; // Invalidated on insertion. Sorted in checkUnload.
124     bool unloaded; // Whether we removed anything from the table in
125                    // removeOCSectionIndices. If this is set we "compact" the
126                    // table (remove unused entries) in `sortOCSectionIndices.
127     OCSectionIndex *indices;
128 } OCSectionIndices;
129 
130 // List of currently live objects. Moved to `old_objects` before unload check.
131 // Marked objects moved back to this list in `markObjectLive`. Remaining objects
132 // are freed at the end of `checkUnload`.
133 //
134 // Double-linked list to be able to remove marked objects. List formed with
135 // `next` and `prev` fields of `ObjectCode`.
136 //
137 // Not static: used in Linker.c.
138 ObjectCode *objects = NULL;
139 
140 // `objects` list is moved here before unload check. Marked objects are moved
141 // back to `objects`. Remaining objects are freed.
142 static ObjectCode *old_objects = NULL;
143 
144 // Number of objects that we want to unload. When this value is 0 we skip static
145 // object marking during GC and `checkUnload`.
146 //
147 // Not static: we use this value to skip static object marking in evacuate when
148 // this is 0.
149 //
150 // Incremented in `unloadObj_`, decremented as we unload objects in
151 // `checkUnload`.
152 int n_unloaded_objects = 0;
153 
154 // List of objects that we don't want to unload (i.e. we haven't called
155 // unloadObj on these yet). Used as root set for unload check in checkUnload.
156 // Objects are added with loadObj_ and removed with unloadObj_.
157 //
158 // List formed with `next_loaded_object` field of `ObjectCode`.
159 //
160 // Not static: used in Linker.c.
161 ObjectCode *loaded_objects;
162 
163 // Section index table for currently loaded objects. New indices are added by
164 // `loadObj_`, indices of unloaded objects are removed in `checkUnload`. Used to
165 // map static closures to their ObjectCode.
166 static OCSectionIndices *global_s_indices = NULL;
167 
createOCSectionIndices(void)168 static OCSectionIndices *createOCSectionIndices(void)
169 {
170     // TODO (osa): Maybe initialize as empty (without allocation) and allocate
171     // on first insertion?
172     OCSectionIndices *s_indices = stgMallocBytes(sizeof(OCSectionIndices), "OCSectionIndices");
173     int capacity = 1024;
174     s_indices->capacity = capacity;
175     s_indices->n_sections = 0;
176     s_indices->sorted = true;
177     s_indices->unloaded = false;
178     s_indices->indices = stgMallocBytes(capacity * sizeof(OCSectionIndex),
179         "OCSectionIndices::indices");
180     return s_indices;
181 }
182 
freeOCSectionIndices(OCSectionIndices * s_indices)183 static void freeOCSectionIndices(OCSectionIndices *s_indices)
184 {
185     free(s_indices->indices);
186     free(s_indices);
187 }
188 
initUnloadCheck()189 void initUnloadCheck()
190 {
191     global_s_indices = createOCSectionIndices();
192 }
193 
exitUnloadCheck()194 void exitUnloadCheck()
195 {
196     freeOCSectionIndices(global_s_indices);
197     global_s_indices = NULL;
198 }
199 
cmpSectionIndex(const void * indexa,const void * indexb)200 static int cmpSectionIndex(const void* indexa, const void *indexb)
201 {
202     W_ s1 = ((OCSectionIndex*)indexa)->start;
203     W_ s2 = ((OCSectionIndex*)indexb)->start;
204     if (s1 < s2) {
205         return -1;
206     } else if (s1 > s2) {
207         return 1;
208     }
209     return 0;
210 }
211 
reserveOCSectionIndices(OCSectionIndices * s_indices,int len)212 static void reserveOCSectionIndices(OCSectionIndices *s_indices, int len)
213 {
214     int current_capacity = s_indices->capacity;
215     int current_len = s_indices->n_sections;
216     if (current_capacity - current_len >= len) {
217         return;
218     }
219 
220     // Round up to nearest power of 2
221     int new_capacity = 1 << (int)ceil(log2(current_len + len));
222 
223     OCSectionIndex *old_indices = s_indices->indices;
224     OCSectionIndex *new_indices = stgMallocBytes(new_capacity * sizeof(OCSectionIndex),
225         "reserveOCSectionIndices");
226 
227     for (int i = 0; i < current_len; ++i) {
228         new_indices[i] = old_indices[i];
229     }
230 
231     s_indices->capacity = new_capacity;
232     s_indices->indices = new_indices;
233 
234     free(old_indices);
235 }
236 
237 // Insert object section indices of a single ObjectCode. Invalidates 'sorted'
238 // state.
insertOCSectionIndices(ObjectCode * oc)239 void insertOCSectionIndices(ObjectCode *oc)
240 {
241     reserveOCSectionIndices(global_s_indices, oc->n_sections);
242     global_s_indices->sorted = false;
243 
244     int s_i = global_s_indices->n_sections;
245     for (int i = 0; i < oc->n_sections; i++) {
246         if (oc->sections[i].kind != SECTIONKIND_OTHER) {
247             global_s_indices->indices[s_i].start = (W_)oc->sections[i].start;
248             global_s_indices->indices[s_i].end = (W_)oc->sections[i].start
249                 + oc->sections[i].size;
250             global_s_indices->indices[s_i].oc = oc;
251             s_i++;
252         }
253     }
254 
255     global_s_indices->n_sections = s_i;
256 
257     // Add object to 'objects' list
258     if (objects != NULL) {
259         objects->prev = oc;
260     }
261     oc->next = objects;
262     objects = oc;
263 }
264 
265 static int findSectionIdx(OCSectionIndices *s_indices, const void *addr);
266 
removeOCSectionIndices(OCSectionIndices * s_indices,ObjectCode * oc)267 static void removeOCSectionIndices(OCSectionIndices *s_indices, ObjectCode *oc)
268 {
269     // To avoid quadratic behavior in checkUnload we set `oc` fields of indices
270     // of unloaded objects NULL here. Removing unused entries is done in
271     // `sortOCSectionIndices`.
272 
273     s_indices->unloaded = true;
274 
275     for (int i = 0; i < oc->n_sections; i++) {
276         if (oc->sections[i].kind != SECTIONKIND_OTHER) {
277             int section_idx = findSectionIdx(s_indices, oc->sections[i].start);
278             if (section_idx != -1) {
279                 s_indices->indices[section_idx].oc = NULL;
280             }
281         }
282     }
283 }
284 
sortOCSectionIndices(OCSectionIndices * s_indices)285 static void sortOCSectionIndices(OCSectionIndices *s_indices) {
286     if (s_indices->sorted) {
287         return;
288     }
289 
290     qsort(s_indices->indices,
291         s_indices->n_sections,
292         sizeof(OCSectionIndex),
293         cmpSectionIndex);
294 
295     s_indices->sorted = true;
296 }
297 
removeRemovedOCSections(OCSectionIndices * s_indices)298 static void removeRemovedOCSections(OCSectionIndices *s_indices) {
299     if (!s_indices->unloaded) {
300         return;
301     }
302 
303     int next_free_idx = 0;
304     for (int i = 0; i < s_indices->n_sections; ++i) {
305         if (s_indices->indices[i].oc == NULL) {
306             // free entry, skip
307         } else if (i == next_free_idx) {
308             ++next_free_idx;
309         } else {
310             s_indices->indices[next_free_idx] = s_indices->indices[i];
311             ++next_free_idx;
312         }
313     }
314 
315     s_indices->n_sections = next_free_idx;
316     s_indices->unloaded = true;
317 }
318 
319 // Returns -1 if not found
findSectionIdx(OCSectionIndices * s_indices,const void * addr)320 static int findSectionIdx(OCSectionIndices *s_indices, const void *addr) {
321     ASSERT(s_indices->sorted);
322 
323     W_ w_addr = (W_)addr;
324     if (s_indices->n_sections <= 0) {
325         return -1;
326     }
327     if (w_addr < s_indices->indices[0].start) {
328         return -1;
329     }
330 
331     int left = 0, right = s_indices->n_sections;
332     while (left + 1 < right) {
333         int mid = (left + right)/2;
334         W_ w_mid = s_indices->indices[mid].start;
335         if (w_mid <= w_addr) {
336             left = mid;
337         } else {
338             right = mid;
339         }
340     }
341     ASSERT(w_addr >= s_indices->indices[left].start);
342     if (w_addr < s_indices->indices[left].end) {
343         return left;
344     }
345     return -1;
346 }
347 
findOC(OCSectionIndices * s_indices,const void * addr)348 static ObjectCode *findOC(OCSectionIndices *s_indices, const void *addr) {
349     int oc_idx = findSectionIdx(s_indices, addr);
350 
351     if (oc_idx == -1) {
352         return NULL;
353     }
354 
355     return s_indices->indices[oc_idx].oc;
356 }
357 
markObjectLive(void * data STG_UNUSED,StgWord key,const void * value STG_UNUSED)358 static bool markObjectLive(void *data STG_UNUSED, StgWord key, const void *value STG_UNUSED) {
359     ObjectCode *oc = (ObjectCode*)key;
360 
361     // N.B. we may be called by the parallel GC and therefore this must be
362     // thread-safe. To avoid taking the linker_mutex in the fast path
363     // (when the object is already marked) we do an atomic exchange here and
364     // only take the lock in the case that the object is unmarked.
365     if (xchg(&oc->mark, object_code_mark_bit) == object_code_mark_bit) {
366         return true; // for hash table iteration
367     }
368 
369     ACQUIRE_LOCK(&linker_mutex);
370     // Remove from 'old_objects' list
371     if (oc->prev != NULL) {
372         // TODO(osa): Maybe 'prev' should be a pointer to the referencing
373         // *field* ? (instead of referencing *object*)
374         oc->prev->next = oc->next;
375     } else {
376         old_objects = oc->next;
377     }
378     if (oc->next != NULL) {
379         oc->next->prev = oc->prev;
380     }
381 
382     // Add it to 'objects' list
383     oc->prev = NULL;
384     oc->next = objects;
385     if (objects != NULL) {
386         objects->prev = oc;
387     }
388     objects = oc;
389     RELEASE_LOCK(&linker_mutex);
390 
391     // Mark its dependencies
392     iterHashTable(oc->dependencies, NULL, markObjectLive);
393 
394     return true; // for hash table iteration
395 }
396 
markObjectCode(const void * addr)397 void markObjectCode(const void *addr)
398 {
399     if (global_s_indices == NULL) {
400         return;
401     }
402 
403     // This should be checked at the call site
404     ASSERT(!HEAP_ALLOCED(addr));
405 
406     ObjectCode *oc = findOC(global_s_indices, addr);
407     if (oc != NULL) {
408         // Mark the object code and its dependencies
409         markObjectLive(NULL, (W_)oc, NULL);
410     }
411 }
412 
413 // Returns whether or not the GC that follows needs to mark code for potential
414 // unloading.
prepareUnloadCheck()415 bool prepareUnloadCheck()
416 {
417     if (global_s_indices == NULL) {
418         return false;
419     }
420 
421     removeRemovedOCSections(global_s_indices);
422     sortOCSectionIndices(global_s_indices);
423 
424     ASSERT(old_objects == NULL);
425 
426     object_code_mark_bit = ~object_code_mark_bit;
427     old_objects = objects;
428     objects = NULL;
429     return true;
430 }
431 
checkUnload()432 void checkUnload()
433 {
434     if (global_s_indices == NULL) {
435         return;
436     }
437 
438     // At this point we've marked all dynamically loaded static objects
439     // (including their dependencies) during GC, but not the root set of object
440     // code (loaded_objects). Mark the roots first, then unload any unmarked
441     // objects.
442 
443     OCSectionIndices *s_indices = global_s_indices;
444     ASSERT(s_indices->sorted);
445 
446     // Mark roots
447     for (ObjectCode *oc = loaded_objects; oc != NULL; oc = oc->next_loaded_object) {
448         markObjectLive(NULL, (W_)oc, NULL);
449     }
450 
451     // Free unmarked objects
452     ObjectCode *next = NULL;
453     for (ObjectCode *oc = old_objects; oc != NULL; oc = next) {
454         next = oc->next;
455 
456         removeOCSectionIndices(s_indices, oc);
457 
458         // Symbols should be removed by unloadObj_.
459         // NB (osa): If this assertion doesn't hold then freeObjectCode below
460         // will corrupt symhash as keys of that table live in ObjectCodes. If
461         // you see a segfault in a hash table operation in linker (in non-debug
462         // RTS) then it's probably becuse this assertion did not hold.
463         ASSERT(oc->symbols == NULL);
464 
465         freeObjectCode(oc);
466         n_unloaded_objects -= 1;
467     }
468 
469     old_objects = NULL;
470 }
471