1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2018
4  *
5  * Non-moving garbage collector and allocator: Mark phase
6  *
7  * ---------------------------------------------------------------------------*/
8 
9 #include "Rts.h"
10 // We call evacuate, which expects the thread-local gc_thread to be valid;
11 // This is sometimes declared as a register variable therefore it is necessary
12 // to include the declaration so that the compiler doesn't clobber the register.
13 #include "NonMovingMark.h"
14 #include "NonMovingShortcut.h"
15 #include "NonMoving.h"
16 #include "BlockAlloc.h"  /* for countBlocks */
17 #include "HeapAlloc.h"
18 #include "Task.h"
19 #include "Trace.h"
20 #include "HeapUtils.h"
21 #include "Printer.h"
22 #include "Schedule.h"
23 #include "Weak.h"
24 #include "Stats.h"
25 #include "STM.h"
26 #include "MarkWeak.h"
27 #include "sm/Storage.h"
28 #include "CNF.h"
29 
30 static bool check_in_nonmoving_heap(StgClosure *p);
31 static void mark_closure (MarkQueue *queue, const StgClosure *p, StgClosure **origin);
32 static void mark_tso (MarkQueue *queue, StgTSO *tso);
33 static void mark_stack (MarkQueue *queue, StgStack *stack);
34 static void mark_PAP_payload (MarkQueue *queue,
35                               StgClosure *fun,
36                               StgClosure **payload,
37                               StgWord size);
38 
39 // How many Array# entries to add to the mark queue at once?
40 #define MARK_ARRAY_CHUNK_LENGTH 128
41 
42 /* Note [Large objects in the non-moving collector]
43  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
44  * The nonmoving collector keeps a separate list of its large objects, apart from
45  * oldest_gen->large_objects. There are two reasons for this:
46  *
47  *  1. oldest_gen is mutated by minor collections, which happen concurrently with
48  *     marking
49  *  2. the non-moving collector needs a consistent picture
50  *
51  * At the beginning of a major collection, nonmovingCollect takes the objects in
52  * oldest_gen->large_objects (which includes all large objects evacuated by the
53  * moving collector) and adds them to nonmoving_large_objects. This is the set
54  * of large objects that will being collected in the current major GC cycle.
55  *
56  * As the concurrent mark phase proceeds, the large objects in
57  * nonmoving_large_objects that are found to be live are moved to
58  * nonmoving_marked_large_objects. During sweep we discard all objects that remain
59  * in nonmoving_large_objects and move everything in nonmoving_marked_larged_objects
60  * back to nonmoving_large_objects.
61  *
62  * During minor collections large objects will accumulate on
63  * oldest_gen->large_objects, where they will be picked up by the nonmoving
64  * collector and moved to nonmoving_large_objects during the next major GC.
65  * When this happens the block gets its BF_NONMOVING_SWEEPING flag set to
66  * indicate that it is part of the snapshot and consequently should be marked by
67  * the nonmoving mark phase.
68  *
69  * Note that pinned object blocks are treated as large objects containing only
70  * a single object. That is, the block has a single mark flag (BF_MARKED) and we
71  * consequently will trace the pointers of only one object per block. However,
72  * this is okay since the only type of pinned object supported by GHC is the
73  * pinned ByteArray#, which has no pointers.
74  */
75 
76 bdescr *nonmoving_large_objects = NULL;
77 bdescr *nonmoving_marked_large_objects = NULL;
78 memcount n_nonmoving_large_blocks = 0;
79 memcount n_nonmoving_marked_large_blocks = 0;
80 
81 bdescr *nonmoving_compact_objects = NULL;
82 bdescr *nonmoving_marked_compact_objects = NULL;
83 memcount n_nonmoving_compact_blocks = 0;
84 memcount n_nonmoving_marked_compact_blocks = 0;
85 
86 #if defined(THREADED_RTS)
87 /* Protects everything above. Furthermore, we only set the BF_MARKED bit of
88  * large object blocks when this is held. This ensures that the write barrier
89  * (e.g. finish_upd_rem_set_mark) and the collector (mark_closure) don't try to
90  * move the same large object to nonmoving_marked_large_objects more than once.
91  */
92 static Mutex nonmoving_large_objects_mutex;
93 // Note that we don't need a similar lock for compact objects becuase we never
94 // mark a compact object eagerly in a write barrier; all compact objects are
95 // marked by the mark thread, so there can't be any races here.
96 #endif
97 
98 /*
99  * Where we keep our threads during collection since we must have a snapshot of
100  * the threads that lived in the nonmoving heap at the time that the snapshot
101  * was taken to safely resurrect.
102  */
103 StgTSO *nonmoving_old_threads = END_TSO_QUEUE;
104 /* Same for weak pointers */
105 StgWeak *nonmoving_old_weak_ptr_list = NULL;
106 /* Because we can "tidy" thread and weak lists concurrently with a minor GC we
107  * need to move marked threads and weaks to these lists until we pause for sync.
108  * Then we move them to oldest_gen lists. */
109 StgTSO *nonmoving_threads = END_TSO_QUEUE;
110 StgWeak *nonmoving_weak_ptr_list = NULL;
111 
112 #if defined(DEBUG)
113 // TODO (osa): Document
114 StgIndStatic *debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST;
115 #endif
116 
117 /* Note [Update remembered set]
118  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
119  * The concurrent non-moving collector uses a remembered set to ensure
120  * that its marking is consistent with the snapshot invariant defined in
121  * the design. This remembered set, known as the update remembered set,
122  * records all pointers that have been overwritten since the beginning
123  * of the concurrent mark. This ensures that concurrent mutation cannot hide
124  * pointers to live objects from the nonmoving garbage collector.
125  *
126  * The update remembered set is maintained via a write barrier that
127  * is enabled whenever a concurrent mark is active. This write barrier
128  * can be found in a number of places:
129  *
130  *  - In rts/Primops.cmm in primops responsible for modifying mutable closures
131  *    (e.g. MVARs, MUT_VARs, etc.)
132  *
133  *  - In rts/STM.c, where
134  *
135  *  - In the dirty_* functions found in rts/Storage.c where we dirty MVARs,
136  *    MUT_VARs, TSOs and STACKs. STACK is a somewhat special case, as described
137  *    in Note [StgStack dirtiness flags and concurrent marking] in TSO.h.
138  *
139  *  - In the code generated by the STG code generator for pointer array writes
140  *
141  *  - In thunk updates (e.g. updateWithIndirection) to ensure that the free
142  *    variables of the original thunk remain reachable.
143  *
144  * There is also a read barrier to handle weak references, as described in
145  * Note [Concurrent read barrier on deRefWeak#].
146  *
147  * The representation of the update remembered set is the same as that of
148  * the mark queue. For efficiency, each capability maintains its own local
149  * accumulator of remembered set entries. When a capability fills its
150  * accumulator it is linked in to the global remembered set
151  * (upd_rem_set_block_list), where it is consumed by the mark phase.
152  *
153  * The mark phase is responsible for freeing update remembered set block
154  * allocations.
155  *
156  * Note that we eagerly flush update remembered sets during minor GCs as
157  * described in Note [Eager update remembered set flushing].
158  *
159  *
160  * Note [Eager update remembered set flushing]
161  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
162  *
163  * We eagerly flush update remembered sets during minor GCs to avoid scenarios
164  * like the following which could result in long sync pauses:
165  *
166  *  1. We start a major GC, all thread stacks are added to the mark queue.
167  *  2. The concurrent mark thread starts.
168  *  3. The mutator is allowed to resume. One mutator thread T is scheduled and marks its
169  *     stack to its local update remembered set.
170  *  4. The mark thread eventually encounters the mutator thread's stack but
171  *     sees that it has already been marked; skips it.
172  *  5. Thread T continues running but does not push enough to its update
173  *     remembered set to require a flush.
174  *  6. Eventually the mark thread finished marking and requests a final sync.
175  *  7. The thread T flushes its update remembered set.
176  *  8. We find that a large fraction of the heap (namely the subset that is
177  *     only reachable from the thread T's stack) needs to be marked, incurring
178  *     a large sync pause
179  *
180  * We avoid this by periodically (during minor GC) forcing a flush of the
181  * update remembered set.
182  *
183  * A better (but more complex) approach that would be worthwhile trying in the
184  * future would be to rather do the following:
185  *
186  *  1. Concurrent mark phase is on-going
187  *  2. Mark thread runs out of things to mark
188  *  3. Mark thread sends a signal to capabilities requesting that they send
189  *     their update remembered sets without suspending their execution
190  *  4. The mark thread marks everything it was sent; runs out of things to mark
191  *  5. Mark thread initiates a sync
192  *  6. Capabilities send their final update remembered sets and suspend execution
193  *  7. Mark thread marks everything is was sent
194  *  8. Mark thead allows capabilities to resume.
195  *
196  * However, this is obviously a fair amount of complexity and so far the
197  * periodic eager flushing approach has been sufficient.
198  *
199  *
200  * Note [Concurrent read barrier on deRefWeak#]
201  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
202  *
203  * In general the non-moving GC assumes that all pointers reachable from a
204  * marked object are themselves marked (or in the mark queue). However,
205  * weak pointers are an obvious exception to this rule. In particular,
206  * deRefWeakPtr# allows the mutator to turn a weak reference into a strong
207  * reference. This interacts badly with concurrent collection. For
208  * instance, consider this program:
209  *
210  *     f :: a -> b -> IO b
211  *     f k v = do
212  *         -- assume that k and v are the only references to the
213  *         -- closures to which they refer.
214  *         weak <- mkWeakPtr k v Nothing
215  *
216  *         -- N.B. k is now technically dead since the only reference to it is
217  *         -- weak, but we've not yet had a chance to tombstone the WeakPtr
218  *         -- (which will happen in the course of major GC).
219  *         performMajorGC
220  *         -- Now we are running concurrently with the mark...
221 
222  *         Just x <- deRefWeak weak
223  *         -- We have now introduced a reference to `v`, which will
224  *         -- not be marked as the only reference to `v` when the snapshot was
225  *         -- taken is via a WeakPtr.
226  *         return x
227  *
228  */
229 bdescr *upd_rem_set_block_list = NULL;
230 #if defined(THREADED_RTS)
231 static Mutex upd_rem_set_lock;
232 
233 /* Used during the mark/sweep phase transition to track how many capabilities
234  * have pushed their update remembered sets. Protected by upd_rem_set_lock.
235  */
236 static volatile StgWord upd_rem_set_flush_count = 0;
237 
238 /* Signaled by each capability when it has flushed its update remembered set */
239 static Condition upd_rem_set_flushed_cond;
240 #endif
241 
242 /* Indicates to mutators that the write barrier must be respected. Set while
243  * concurrent mark is running.
244  */
245 StgWord nonmoving_write_barrier_enabled = false;
246 
247 /* Used to provide the current mark queue to the young generation
248  * collector for scavenging.
249  */
250 MarkQueue *current_mark_queue = NULL;
251 
252 /* Initialise update remembered set data structures */
nonmovingMarkInitUpdRemSet()253 void nonmovingMarkInitUpdRemSet() {
254 #if defined(THREADED_RTS)
255     initMutex(&upd_rem_set_lock);
256     initCondition(&upd_rem_set_flushed_cond);
257     initMutex(&nonmoving_large_objects_mutex);
258 #endif
259 }
260 
261 #if defined(THREADED_RTS) && defined(DEBUG)
262 static uint32_t markQueueLength(MarkQueue *q);
263 #endif
264 static void init_mark_queue_(MarkQueue *queue);
265 
266 /* Transfers the given capability's update-remembered set to the global
267  * remembered set.
268  *
269  * Really the argument type should be UpdRemSet* but this would be rather
270  * inconvenient without polymorphism.
271  */
nonmovingAddUpdRemSetBlocks(MarkQueue * rset)272 void nonmovingAddUpdRemSetBlocks(MarkQueue *rset)
273 {
274     if (markQueueIsEmpty(rset)) return;
275 
276     // find the tail of the queue
277     bdescr *start = rset->blocks;
278     bdescr *end = start;
279     while (end->link != NULL)
280         end = end->link;
281 
282     // add the blocks to the global remembered set
283     ACQUIRE_LOCK(&upd_rem_set_lock);
284     end->link = upd_rem_set_block_list;
285     upd_rem_set_block_list = start;
286     RELEASE_LOCK(&upd_rem_set_lock);
287 
288     // Reset remembered set
289     ACQUIRE_SM_LOCK;
290     init_mark_queue_(rset);
291     rset->is_upd_rem_set = true;
292     RELEASE_SM_LOCK;
293 }
294 
295 #if defined(THREADED_RTS)
296 /* Called by capabilities to flush their update remembered sets when
297  * synchronising with the non-moving collector as it transitions from mark to
298  * sweep phase.
299  */
nonmovingFlushCapUpdRemSetBlocks(Capability * cap)300 void nonmovingFlushCapUpdRemSetBlocks(Capability *cap)
301 {
302     debugTrace(DEBUG_nonmoving_gc,
303                "Capability %d flushing update remembered set: %d",
304                cap->no, markQueueLength(&cap->upd_rem_set.queue));
305     traceConcUpdRemSetFlush(cap);
306     nonmovingAddUpdRemSetBlocks(&cap->upd_rem_set.queue);
307     atomic_inc(&upd_rem_set_flush_count, 1);
308     signalCondition(&upd_rem_set_flushed_cond);
309     // After this mutation will remain suspended until nonmovingFinishFlush
310     // releases its capabilities.
311 }
312 
313 /* Request that all capabilities flush their update remembered sets and suspend
314  * execution until the further notice.
315  */
nonmovingBeginFlush(Task * task)316 void nonmovingBeginFlush(Task *task)
317 {
318     debugTrace(DEBUG_nonmoving_gc, "Starting update remembered set flush...");
319     traceConcSyncBegin();
320     upd_rem_set_flush_count = 0;
321     stat_startNonmovingGcSync();
322     stopAllCapabilitiesWith(NULL, task, SYNC_FLUSH_UPD_REM_SET);
323 
324     // XXX: We may have been given a capability via releaseCapability (i.e. a
325     // task suspended due to a foreign call) in which case our requestSync
326     // logic won't have been hit. Make sure that everyone so far has flushed.
327     // Ideally we want to mark asynchronously with syncing.
328     for (uint32_t i = 0; i < n_capabilities; i++) {
329         nonmovingFlushCapUpdRemSetBlocks(capabilities[i]);
330     }
331 }
332 
333 /* Wait until a capability has flushed its update remembered set. Returns true
334  * if all capabilities have flushed.
335  */
nonmovingWaitForFlush()336 bool nonmovingWaitForFlush()
337 {
338     ACQUIRE_LOCK(&upd_rem_set_lock);
339     debugTrace(DEBUG_nonmoving_gc, "Flush count %d", upd_rem_set_flush_count);
340     bool finished = upd_rem_set_flush_count == n_capabilities;
341     if (!finished) {
342         waitCondition(&upd_rem_set_flushed_cond, &upd_rem_set_lock);
343     }
344     RELEASE_LOCK(&upd_rem_set_lock);
345     return finished;
346 }
347 
348 /* Note [Unintentional marking in resurrectThreads]
349  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
350  * In both moving and non-moving collectors threads found to be unreachable are
351  * evacuated/marked and then resurrected with resurrectThreads. resurrectThreads
352  * raises an exception in the unreachable thread via raiseAsync, which does
353  * mutations on the heap. These mutations cause adding stuff to UpdRemSet of the
354  * thread's capability. Here's an example backtrace where this happens:
355  *
356  *     #0  updateRemembSetPushClosure
357  *     #1  0x000000000072b363 in dirty_TVAR
358  *     #2  0x00000000007162e5 in remove_watch_queue_entries_for_trec
359  *     #3  0x0000000000717098 in stmAbortTransaction
360  *     #4  0x000000000070c6eb in raiseAsync
361  *     #5  0x000000000070b473 in throwToSingleThreaded__
362  *     #6  0x000000000070b4ab in throwToSingleThreaded
363  *     #7  0x00000000006fce82 in resurrectThreads
364  *     #8  0x00000000007215db in nonmovingMark_
365  *     #9  0x0000000000721438 in nonmovingConcurrentMark
366  *     #10 0x00007f1ee81cd6db in start_thread
367  *     #11 0x00007f1ee850688f in clone
368  *
369  * However we don't really want to run write barriers when calling
370  * resurrectThreads here, because we're in a GC pause, and overwritten values
371  * are definitely gone forever (as opposed to being inserted in a marked object
372  * or kept in registers and used later).
373  *
374  * When this happens, if we don't reset the UpdRemSets, what happens is in the
375  * next mark we see these objects that were added in previous mark's
376  * resurrectThreads in UpdRemSets, and mark those. This causes keeping
377  * unreachable objects alive, and effects weak finalization and thread resurrect
378  * (which rely on things become unreachable). As an example, stm048 fails when
379  * we get this wrong, because when we do raiseAsync on a thread that was blocked
380  * on an STM transaction we mutate a TVAR_WATCH_QUEUE, which has a reference to
381  * the TSO that was running the STM transaction. If the TSO becomes unreachable
382  * again in the next GC we don't realize this, because it was added to an
383  * UpdRemSet in the previous GC's mark phase, because of raiseAsync.
384  *
385  * To fix this we clear all UpdRemSets in nonmovingFinishFlush, right before
386  * releasing capabilities. This is somewhat inefficient (we allow adding objects
387  * to UpdRemSets, only to later reset them), but the only case where we add to
388  * UpdRemSets during mark is resurrectThreads, and I don't think we do so many
389  * resurrection in a thread that we fill UpdRemSets and allocate new blocks. So
390  * pushing an UpdRemSet in this case is really fast, and resetting is even
391  * faster (we just update a pointer).
392  *
393  * TODO (osa): What if we actually marked UpdRemSets in this case, in the mark
394  * loop? Would that work? Or what would break?
395  */
396 
397 /* Notify capabilities that the synchronisation is finished; they may resume
398  * execution.
399  */
nonmovingFinishFlush(Task * task)400 void nonmovingFinishFlush(Task *task)
401 {
402     // See Note [Unintentional marking in resurrectThreads]
403     for (uint32_t i = 0; i < n_capabilities; i++) {
404         reset_upd_rem_set(&capabilities[i]->upd_rem_set);
405     }
406     // Also reset upd_rem_set_block_list in case some of the UpdRemSets were
407     // filled and we flushed them.
408     freeChain_lock(upd_rem_set_block_list);
409     upd_rem_set_block_list = NULL;
410 
411     debugTrace(DEBUG_nonmoving_gc, "Finished update remembered set flush...");
412     traceConcSyncEnd();
413     stat_endNonmovingGcSync();
414     releaseAllCapabilities(n_capabilities, NULL, task);
415 }
416 #endif
417 
418 /*********************************************************
419  * Pushing to either the mark queue or remembered set
420  *********************************************************/
421 
422 STATIC_INLINE void
push(MarkQueue * q,const MarkQueueEnt * ent)423 push (MarkQueue *q, const MarkQueueEnt *ent)
424 {
425     // Are we at the end of the block?
426     if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
427         // Yes, this block is full.
428         if (q->is_upd_rem_set) {
429             nonmovingAddUpdRemSetBlocks(q);
430         } else {
431             // allocate a fresh block.
432             ACQUIRE_SM_LOCK;
433             bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
434             bd->link = q->blocks;
435             q->blocks = bd;
436             q->top = (MarkQueueBlock *) bd->start;
437             q->top->head = 0;
438             RELEASE_SM_LOCK;
439         }
440     }
441 
442     q->top->entries[q->top->head] = *ent;
443     q->top->head++;
444 }
445 
446 /* A variant of push to be used by the minor GC when it encounters a reference
447  * to an object in the non-moving heap. In contrast to the other push
448  * operations this uses the gc_alloc_block_sync spinlock instead of the
449  * SM_LOCK to allocate new blocks in the event that the mark queue is full.
450  */
451 void
markQueuePushClosureGC(MarkQueue * q,StgClosure * p)452 markQueuePushClosureGC (MarkQueue *q, StgClosure *p)
453 {
454     if (!check_in_nonmoving_heap(p)) {
455         return;
456     }
457 
458     /* We should not make it here if we are doing a deadlock detect GC.
459      * See Note [Deadlock detection under nonmoving collector].
460      * This is actually no longer true due to call in nonmovingScavengeOne
461      * introduced due to Note [Dirty flags in the non-moving collector]
462      * (see NonMoving.c).
463      */
464     //ASSERT(!deadlock_detect_gc);
465 
466     // Are we at the end of the block?
467     if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
468         // Yes, this block is full.
469         // allocate a fresh block.
470         ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
471         bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
472         bd->link = q->blocks;
473         q->blocks = bd;
474         q->top = (MarkQueueBlock *) bd->start;
475         q->top->head = 0;
476         RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
477     }
478 
479     MarkQueueEnt ent = {
480         .mark_closure = {
481             .p = TAG_CLOSURE(MARK_CLOSURE, UNTAG_CLOSURE(p)),
482             .origin = NULL,
483         }
484     };
485     q->top->entries[q->top->head] = ent;
486     q->top->head++;
487 }
488 
489 static inline
push_closure(MarkQueue * q,StgClosure * p,StgClosure ** origin)490 void push_closure (MarkQueue *q,
491                    StgClosure *p,
492                    StgClosure **origin)
493 {
494 #if defined(DEBUG)
495     ASSERT(!HEAP_ALLOCED_GC(p) || (Bdescr((StgPtr) p)->gen == oldest_gen));
496     ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
497     // Commenting out: too slow
498     // if (RtsFlags.DebugFlags.sanity) {
499     //     assert_in_nonmoving_heap((P_)p);
500     //     if (origin)
501     //         assert_in_nonmoving_heap((P_)origin);
502     // }
503 #endif
504 
505     // This must be true as origin points to a pointer and therefore must be
506     // word-aligned. However, we check this as otherwise we would confuse this
507     // with a mark_array entry
508     ASSERT(((uintptr_t) origin & 0x3) == 0);
509 
510     MarkQueueEnt ent = {
511         .mark_closure = {
512             .p = TAG_CLOSURE(MARK_CLOSURE, UNTAG_CLOSURE(p)),
513             .origin = origin,
514         }
515     };
516     push(q, &ent);
517 }
518 
519 static
push_array(MarkQueue * q,const StgMutArrPtrs * array,StgWord start_index)520 void push_array (MarkQueue *q,
521                  const StgMutArrPtrs *array,
522                  StgWord start_index)
523 {
524     // TODO: Push this into callers where they already have the Bdescr
525     if (HEAP_ALLOCED_GC(array) && (Bdescr((StgPtr) array)->gen != oldest_gen))
526         return;
527 
528     MarkQueueEnt ent = {
529         .mark_array = {
530             .array = (const StgMutArrPtrs *) TAG_CLOSURE(MARK_ARRAY, UNTAG_CLOSURE((StgClosure *) array)),
531             .start_index = start_index,
532         }
533     };
534     push(q, &ent);
535 }
536 
537 static
push_thunk_srt(MarkQueue * q,const StgInfoTable * info)538 void push_thunk_srt (MarkQueue *q, const StgInfoTable *info)
539 {
540     const StgThunkInfoTable *thunk_info = itbl_to_thunk_itbl(info);
541     if (thunk_info->i.srt) {
542         push_closure(q, (StgClosure*)GET_SRT(thunk_info), NULL);
543     }
544 }
545 
546 static
push_fun_srt(MarkQueue * q,const StgInfoTable * info)547 void push_fun_srt (MarkQueue *q, const StgInfoTable *info)
548 {
549     const StgFunInfoTable *fun_info = itbl_to_fun_itbl(info);
550     if (fun_info->i.srt) {
551         push_closure(q, (StgClosure*)GET_FUN_SRT(fun_info), NULL);
552     }
553 }
554 
555 /*********************************************************
556  * Pushing to the update remembered set
557  *
558  * upd_rem_set_push_* functions are directly called by
559  * mutators and need to check whether the value is in
560  * non-moving heap.
561  *********************************************************/
562 
563 // Check if the object is traced by the non-moving collector. This holds in two
564 // conditions:
565 //
566 // - Object is in non-moving heap
567 // - Object is a large (BF_LARGE) and marked as BF_NONMOVING
568 // - Object is static (HEAP_ALLOCED_GC(obj) == false)
569 //
570 static
check_in_nonmoving_heap(StgClosure * p)571 bool check_in_nonmoving_heap(StgClosure *p) {
572     if (HEAP_ALLOCED_GC(p)) {
573         // This works for both large and small objects:
574         return Bdescr((P_)p)->flags & BF_NONMOVING;
575     } else {
576         return true; // a static object
577     }
578 }
579 
580 /* Push the free variables of a (now-evaluated) thunk to the
581  * update remembered set.
582  */
updateRemembSetPushThunk(Capability * cap,StgThunk * thunk)583 inline void updateRemembSetPushThunk(Capability *cap, StgThunk *thunk)
584 {
585     const StgInfoTable *info;
586     do {
587         info = *(StgInfoTable* volatile*) &thunk->header.info;
588     } while (info == &stg_WHITEHOLE_info);
589 
590     const StgThunkInfoTable *thunk_info = THUNK_INFO_PTR_TO_STRUCT(info);
591     updateRemembSetPushThunkEager(cap, thunk_info, thunk);
592 }
593 
594 /* Push the free variables of a thunk to the update remembered set.
595  * This is called by the thunk update code (e.g. updateWithIndirection) before
596  * we update the indirectee to ensure that the thunk's free variables remain
597  * visible to the concurrent collector.
598  *
599  * See Note [Update rememembered set].
600  */
updateRemembSetPushThunkEager(Capability * cap,const StgThunkInfoTable * info,StgThunk * thunk)601 void updateRemembSetPushThunkEager(Capability *cap,
602                                    const StgThunkInfoTable *info,
603                                    StgThunk *thunk)
604 {
605     /* N.B. info->i.type mustn't be WHITEHOLE */
606     MarkQueue *queue = &cap->upd_rem_set.queue;
607     switch (info->i.type) {
608     case THUNK:
609     case THUNK_1_0:
610     case THUNK_0_1:
611     case THUNK_2_0:
612     case THUNK_1_1:
613     case THUNK_0_2:
614     {
615         push_thunk_srt(queue, &info->i);
616 
617         for (StgWord i = 0; i < info->i.layout.payload.ptrs; i++) {
618             if (check_in_nonmoving_heap(thunk->payload[i])) {
619                 // Don't bother to push origin; it makes the barrier needlessly
620                 // expensive with little benefit.
621                 push_closure(queue, thunk->payload[i], NULL);
622             }
623         }
624         break;
625     }
626     case AP:
627     {
628         StgAP *ap = (StgAP *) thunk;
629         if (check_in_nonmoving_heap(ap->fun)) {
630             push_closure(queue, ap->fun, NULL);
631         }
632         mark_PAP_payload(queue, ap->fun, ap->payload, ap->n_args);
633         break;
634     }
635     case THUNK_SELECTOR:
636     case BLACKHOLE:
637         // TODO: This is right, right?
638         break;
639     // The selector optimization performed by the nonmoving mark may have
640     // overwritten a thunk which we are updating with an indirection.
641     case IND:
642     {
643         StgInd *ind = (StgInd *) thunk;
644         if (check_in_nonmoving_heap(ind->indirectee)) {
645             push_closure(queue, ind->indirectee, NULL);
646         }
647         break;
648     }
649     default:
650         barf("updateRemembSetPushThunk: invalid thunk pushed: p=%p, type=%d",
651              thunk, info->i.type);
652     }
653 }
654 
updateRemembSetPushThunk_(StgRegTable * reg,StgThunk * p)655 void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p)
656 {
657     updateRemembSetPushThunk(regTableToCapability(reg), p);
658 }
659 
updateRemembSetPushClosure(Capability * cap,StgClosure * p)660 inline void updateRemembSetPushClosure(Capability *cap, StgClosure *p)
661 {
662     if (check_in_nonmoving_heap(p)) {
663         MarkQueue *queue = &cap->upd_rem_set.queue;
664         push_closure(queue, p, NULL);
665     }
666 }
667 
updateRemembSetPushClosure_(StgRegTable * reg,struct StgClosure_ * p)668 void updateRemembSetPushClosure_(StgRegTable *reg, struct StgClosure_ *p)
669 {
670     updateRemembSetPushClosure(regTableToCapability(reg), p);
671 }
672 
needs_upd_rem_set_mark(StgClosure * p)673 STATIC_INLINE bool needs_upd_rem_set_mark(StgClosure *p)
674 {
675     // TODO: Deduplicate with mark_closure
676     bdescr *bd = Bdescr((StgPtr) p);
677     if (bd->gen != oldest_gen) {
678         return false;
679     } else if (bd->flags & BF_LARGE) {
680         if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
681             return false;
682         } else {
683             return ! (bd->flags & BF_MARKED);
684         }
685     } else {
686         struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
687         nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
688         return nonmovingGetMark(seg, block_idx) != nonmovingMarkEpoch;
689     }
690 }
691 
692 /* Set the mark bit; only to be called *after* we have fully marked the closure */
finish_upd_rem_set_mark(StgClosure * p)693 STATIC_INLINE void finish_upd_rem_set_mark(StgClosure *p)
694 {
695     bdescr *bd = Bdescr((StgPtr) p);
696     if (bd->flags & BF_LARGE) {
697         // Someone else may have already marked it.
698         ACQUIRE_LOCK(&nonmoving_large_objects_mutex);
699         if (! (bd->flags & BF_MARKED)) {
700             bd->flags |= BF_MARKED;
701             dbl_link_remove(bd, &nonmoving_large_objects);
702             dbl_link_onto(bd, &nonmoving_marked_large_objects);
703             n_nonmoving_large_blocks -= bd->blocks;
704             n_nonmoving_marked_large_blocks += bd->blocks;
705         }
706         RELEASE_LOCK(&nonmoving_large_objects_mutex);
707     } else {
708         struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
709         nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
710         nonmovingSetMark(seg, block_idx);
711     }
712 }
713 
updateRemembSetPushTSO(Capability * cap,StgTSO * tso)714 void updateRemembSetPushTSO(Capability *cap, StgTSO *tso)
715 {
716     if (needs_upd_rem_set_mark((StgClosure *) tso)) {
717         debugTrace(DEBUG_nonmoving_gc, "upd_rem_set: TSO %p", tso);
718         mark_tso(&cap->upd_rem_set.queue, tso);
719         finish_upd_rem_set_mark((StgClosure *) tso);
720     }
721 }
722 
updateRemembSetPushStack(Capability * cap,StgStack * stack)723 void updateRemembSetPushStack(Capability *cap, StgStack *stack)
724 {
725     // N.B. caller responsible for checking nonmoving_write_barrier_enabled
726     if (needs_upd_rem_set_mark((StgClosure *) stack)) {
727         StgWord8 marking = stack->marking;
728         // See Note [StgStack dirtiness flags and concurrent marking]
729         if (cas_word8(&stack->marking, marking, nonmovingMarkEpoch)
730               != nonmovingMarkEpoch) {
731             // We have claimed the right to mark the stack.
732             debugTrace(DEBUG_nonmoving_gc, "upd_rem_set: STACK %p", stack->sp);
733             mark_stack(&cap->upd_rem_set.queue, stack);
734             finish_upd_rem_set_mark((StgClosure *) stack);
735             return;
736         } else {
737             // The concurrent GC has claimed the right to mark the stack.
738             // Wait until it finishes marking before proceeding with
739             // mutation.
740             while (needs_upd_rem_set_mark((StgClosure *) stack))
741 #if defined(PARALLEL_GC)
742                 busy_wait_nop(); // TODO: Spinning here is unfortunate
743 #else
744                 ;
745 #endif
746             return;
747         }
748     }
749 }
750 
751 /*********************************************************
752  * Pushing to the mark queue
753  *********************************************************/
754 
markQueuePush(MarkQueue * q,const MarkQueueEnt * ent)755 void markQueuePush (MarkQueue *q, const MarkQueueEnt *ent)
756 {
757     push(q, ent);
758 }
759 
markQueuePushClosure(MarkQueue * q,StgClosure * p,StgClosure ** origin)760 void markQueuePushClosure (MarkQueue *q,
761                            StgClosure *p,
762                            StgClosure **origin)
763 {
764     // TODO: Push this into callers where they already have the Bdescr
765     if (check_in_nonmoving_heap(p)) {
766         push_closure(q, p, origin);
767     }
768 }
769 
770 /* TODO: Do we really never want to specify the origin here? */
markQueueAddRoot(MarkQueue * q,StgClosure ** root)771 void markQueueAddRoot (MarkQueue* q, StgClosure** root)
772 {
773     markQueuePushClosure(q, *root, NULL);
774 }
775 
776 /* Push a closure to the mark queue without origin information */
markQueuePushClosure_(MarkQueue * q,StgClosure * p)777 void markQueuePushClosure_ (MarkQueue *q, StgClosure *p)
778 {
779     markQueuePushClosure(q, p, NULL);
780 }
781 
markQueuePushFunSrt(MarkQueue * q,const StgInfoTable * info)782 void markQueuePushFunSrt (MarkQueue *q, const StgInfoTable *info)
783 {
784     push_fun_srt(q, info);
785 }
786 
markQueuePushThunkSrt(MarkQueue * q,const StgInfoTable * info)787 void markQueuePushThunkSrt (MarkQueue *q, const StgInfoTable *info)
788 {
789     push_thunk_srt(q, info);
790 }
791 
markQueuePushArray(MarkQueue * q,const StgMutArrPtrs * array,StgWord start_index)792 void markQueuePushArray (MarkQueue *q,
793                             const StgMutArrPtrs *array,
794                             StgWord start_index)
795 {
796     push_array(q, array, start_index);
797 }
798 
799 /*********************************************************
800  * Popping from the mark queue
801  *********************************************************/
802 
803 // Returns invalid MarkQueueEnt if queue is empty.
markQueuePop_(MarkQueue * q)804 static MarkQueueEnt markQueuePop_ (MarkQueue *q)
805 {
806     MarkQueueBlock *top;
807 
808 again:
809     top = q->top;
810 
811     // Are we at the beginning of the block?
812     if (top->head == 0) {
813         // Is this the first block of the queue?
814         if (q->blocks->link == NULL) {
815             // Yes, therefore queue is empty...
816             MarkQueueEnt none = { .null_entry = { .p = NULL } };
817             return none;
818         } else {
819             // No, unwind to the previous block and try popping again...
820             bdescr *old_block = q->blocks;
821             q->blocks = old_block->link;
822             q->top = (MarkQueueBlock*)q->blocks->start;
823             ACQUIRE_SM_LOCK;
824             freeGroup(old_block); // TODO: hold on to a block to avoid repeated allocation/deallocation?
825             RELEASE_SM_LOCK;
826             goto again;
827         }
828     }
829 
830     top->head--;
831     MarkQueueEnt ent = top->entries[top->head];
832     return ent;
833 }
834 
markQueuePop(MarkQueue * q)835 static MarkQueueEnt markQueuePop (MarkQueue *q)
836 {
837 #if MARK_PREFETCH_QUEUE_DEPTH == 0
838     return markQueuePop_(q);
839 #else
840     unsigned int i = q->prefetch_head;
841     while (nonmovingMarkQueueEntryType(&q->prefetch_queue[i]) == NULL_ENTRY) {
842         MarkQueueEnt new = markQueuePop_(q);
843         if (nonmovingMarkQueueEntryType(&new) == NULL_ENTRY) {
844             // Mark queue is empty; look for any valid entries in the prefetch
845             // queue
846             for (unsigned int j = (i+1) % MARK_PREFETCH_QUEUE_DEPTH;
847                  j != i;
848                  j = (j+1) % MARK_PREFETCH_QUEUE_DEPTH)
849             {
850                 if (nonmovingMarkQueueEntryType(&q->prefetch_queue[j]) != NULL_ENTRY) {
851                     i = j;
852                     goto done;
853                 }
854             }
855             return new;
856         }
857 
858         // The entry may not be a MARK_CLOSURE but it doesn't matter, our
859         // MarkQueueEnt encoding always places the pointer to the object to be
860         // marked first.
861         prefetchForRead(&new.mark_closure.p->header.info);
862         prefetchForRead(Bdescr((StgPtr) new.mark_closure.p));
863         q->prefetch_queue[i] = new;
864         i = (i + 1) % MARK_PREFETCH_QUEUE_DEPTH;
865     }
866 
867   done:
868     ;
869     MarkQueueEnt ret = q->prefetch_queue[i];
870     q->prefetch_queue[i].null_entry.p = NULL;
871     q->prefetch_head = i;
872     return ret;
873 #endif
874 }
875 
876 /*********************************************************
877  * Creating and destroying MarkQueues and UpdRemSets
878  *********************************************************/
879 
880 /* Must hold sm_mutex. */
init_mark_queue_(MarkQueue * queue)881 static void init_mark_queue_ (MarkQueue *queue)
882 {
883     bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
884     queue->blocks = bd;
885     queue->top = (MarkQueueBlock *) bd->start;
886     queue->top->head = 0;
887 #if MARK_PREFETCH_QUEUE_DEPTH > 0
888     memset(&queue->prefetch_queue, 0, sizeof(queue->prefetch_queue));
889     queue->prefetch_head = 0;
890 #endif
891 }
892 
893 /* Must hold sm_mutex. */
initMarkQueue(MarkQueue * queue)894 void initMarkQueue (MarkQueue *queue)
895 {
896     init_mark_queue_(queue);
897     queue->is_upd_rem_set = false;
898 }
899 
900 /* Must hold sm_mutex. */
init_upd_rem_set(UpdRemSet * rset)901 void init_upd_rem_set (UpdRemSet *rset)
902 {
903     init_mark_queue_(&rset->queue);
904     rset->queue.is_upd_rem_set = true;
905 }
906 
reset_upd_rem_set(UpdRemSet * rset)907 void reset_upd_rem_set (UpdRemSet *rset)
908 {
909     // UpdRemSets always have one block for the mark queue. This assertion is to
910     // update this code if we change that.
911     ASSERT(rset->queue.blocks->link == NULL);
912     rset->queue.top->head = 0;
913 }
914 
freeMarkQueue(MarkQueue * queue)915 void freeMarkQueue (MarkQueue *queue)
916 {
917     freeChain_lock(queue->blocks);
918 }
919 
920 #if defined(THREADED_RTS) && defined(DEBUG)
921 static uint32_t
markQueueLength(MarkQueue * q)922 markQueueLength (MarkQueue *q)
923 {
924     uint32_t n = 0;
925     for (bdescr *block = q->blocks; block; block = block->link) {
926         MarkQueueBlock *queue = (MarkQueueBlock*)block->start;
927         n += queue->head;
928     }
929     return n;
930 }
931 #endif
932 
933 
934 /*********************************************************
935  * Marking
936  *********************************************************/
937 
938 /*
939  * N.B. Mutation of TRecHeaders is completely unprotected by any write
940  * barrier. Consequently it's quite important that we deeply mark
941  * any outstanding transactions.
942  */
943 static void
mark_trec_header(MarkQueue * queue,StgTRecHeader * trec)944 mark_trec_header (MarkQueue *queue, StgTRecHeader *trec)
945 {
946     while (trec != NO_TREC) {
947         StgTRecChunk *chunk = trec->current_chunk;
948         markQueuePushClosure_(queue, (StgClosure *) trec);
949         markQueuePushClosure_(queue, (StgClosure *) chunk);
950         while (chunk != END_STM_CHUNK_LIST) {
951             for (StgWord i=0; i < chunk->next_entry_idx; i++) {
952                 TRecEntry *ent = &chunk->entries[i];
953                 markQueuePushClosure_(queue, (StgClosure *) ent->tvar);
954                 markQueuePushClosure_(queue, ent->expected_value);
955                 markQueuePushClosure_(queue, ent->new_value);
956             }
957             chunk = chunk->prev_chunk;
958         }
959         trec = trec->enclosing_trec;
960     }
961 }
962 
963 static void
mark_tso(MarkQueue * queue,StgTSO * tso)964 mark_tso (MarkQueue *queue, StgTSO *tso)
965 {
966     // TODO: Clear dirty if contains only old gen objects
967 
968     if (tso->bound != NULL) {
969         markQueuePushClosure_(queue, (StgClosure *) tso->bound->tso);
970     }
971 
972     markQueuePushClosure_(queue, (StgClosure *) tso->blocked_exceptions);
973     markQueuePushClosure_(queue, (StgClosure *) tso->bq);
974     mark_trec_header(queue, tso->trec);
975     markQueuePushClosure_(queue, (StgClosure *) tso->stackobj);
976     markQueuePushClosure_(queue, (StgClosure *) tso->_link);
977     if (   tso->why_blocked == BlockedOnMVar
978         || tso->why_blocked == BlockedOnMVarRead
979         || tso->why_blocked == BlockedOnBlackHole
980         || tso->why_blocked == BlockedOnMsgThrowTo
981         || tso->why_blocked == NotBlocked
982         ) {
983         markQueuePushClosure_(queue, tso->block_info.closure);
984     }
985 }
986 
987 static void
do_push_closure(StgClosure ** p,void * user)988 do_push_closure (StgClosure **p, void *user)
989 {
990     MarkQueue *queue = (MarkQueue *) user;
991     // TODO: Origin? need reference to containing closure
992     markQueuePushClosure_(queue, *p);
993 }
994 
995 static void
mark_large_bitmap(MarkQueue * queue,StgClosure ** p,StgLargeBitmap * large_bitmap,StgWord size)996 mark_large_bitmap (MarkQueue *queue,
997                    StgClosure **p,
998                    StgLargeBitmap *large_bitmap,
999                    StgWord size)
1000 {
1001     walk_large_bitmap(do_push_closure, p, large_bitmap, size, queue);
1002 }
1003 
1004 static void
mark_small_bitmap(MarkQueue * queue,StgClosure ** p,StgWord size,StgWord bitmap)1005 mark_small_bitmap (MarkQueue *queue, StgClosure **p, StgWord size, StgWord bitmap)
1006 {
1007     while (size > 0) {
1008         if ((bitmap & 1) == 0) {
1009             // TODO: Origin?
1010             markQueuePushClosure(queue, *p, NULL);
1011         }
1012         p++;
1013         bitmap = bitmap >> 1;
1014         size--;
1015     }
1016 }
1017 
1018 static GNUC_ATTR_HOT
mark_PAP_payload(MarkQueue * queue,StgClosure * fun,StgClosure ** payload,StgWord size)1019 void mark_PAP_payload (MarkQueue *queue,
1020                        StgClosure *fun,
1021                        StgClosure **payload,
1022                        StgWord size)
1023 {
1024     const StgFunInfoTable *fun_info = get_fun_itbl(UNTAG_CONST_CLOSURE(fun));
1025     ASSERT(fun_info->i.type != PAP);
1026     StgPtr p = (StgPtr) payload;
1027 
1028     StgWord bitmap;
1029     switch (fun_info->f.fun_type) {
1030     case ARG_GEN:
1031         bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1032         goto small_bitmap;
1033     case ARG_GEN_BIG:
1034         mark_large_bitmap(queue, payload, GET_FUN_LARGE_BITMAP(fun_info), size);
1035         break;
1036     case ARG_BCO:
1037         mark_large_bitmap(queue, payload, BCO_BITMAP(fun), size);
1038         break;
1039     default:
1040         bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1041     small_bitmap:
1042         mark_small_bitmap(queue, (StgClosure **) p, size, bitmap);
1043         break;
1044     }
1045 }
1046 
1047 /* Helper for mark_stack; returns next stack frame. */
1048 static StgPtr
mark_arg_block(MarkQueue * queue,const StgFunInfoTable * fun_info,StgClosure ** args)1049 mark_arg_block (MarkQueue *queue, const StgFunInfoTable *fun_info, StgClosure **args)
1050 {
1051     StgWord bitmap, size;
1052 
1053     StgPtr p = (StgPtr)args;
1054     switch (fun_info->f.fun_type) {
1055     case ARG_GEN:
1056         bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1057         size = BITMAP_SIZE(fun_info->f.b.bitmap);
1058         goto small_bitmap;
1059     case ARG_GEN_BIG:
1060         size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1061         mark_large_bitmap(queue, (StgClosure**)p, GET_FUN_LARGE_BITMAP(fun_info), size);
1062         p += size;
1063         break;
1064     default:
1065         bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1066         size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1067     small_bitmap:
1068         mark_small_bitmap(queue, (StgClosure**)p, size, bitmap);
1069         p += size;
1070         break;
1071     }
1072     return p;
1073 }
1074 
1075 static GNUC_ATTR_HOT void
mark_stack_(MarkQueue * queue,StgPtr sp,StgPtr spBottom)1076 mark_stack_ (MarkQueue *queue, StgPtr sp, StgPtr spBottom)
1077 {
1078     ASSERT(sp <= spBottom);
1079 
1080     while (sp < spBottom) {
1081         const StgRetInfoTable *info = get_ret_itbl((StgClosure *)sp);
1082         switch (info->i.type) {
1083         case UPDATE_FRAME:
1084         {
1085             // See Note [upd-black-hole] in rts/Scav.c
1086             StgUpdateFrame *frame = (StgUpdateFrame *) sp;
1087             markQueuePushClosure_(queue, frame->updatee);
1088             sp += sizeofW(StgUpdateFrame);
1089             continue;
1090         }
1091 
1092             // small bitmap (< 32 entries, or 64 on a 64-bit machine)
1093         case CATCH_STM_FRAME:
1094         case CATCH_RETRY_FRAME:
1095         case ATOMICALLY_FRAME:
1096         case UNDERFLOW_FRAME:
1097         case STOP_FRAME:
1098         case CATCH_FRAME:
1099         case RET_SMALL:
1100         {
1101             StgWord bitmap = BITMAP_BITS(info->i.layout.bitmap);
1102             StgWord size   = BITMAP_SIZE(info->i.layout.bitmap);
1103             // NOTE: the payload starts immediately after the info-ptr, we
1104             // don't have an StgHeader in the same sense as a heap closure.
1105             sp++;
1106             mark_small_bitmap(queue, (StgClosure **) sp, size, bitmap);
1107             sp += size;
1108         }
1109         follow_srt:
1110             if (info->i.srt) {
1111                 markQueuePushClosure_(queue, (StgClosure*)GET_SRT(info));
1112             }
1113             continue;
1114 
1115         case RET_BCO: {
1116             sp++;
1117             markQueuePushClosure_(queue, *(StgClosure**)sp);
1118             StgBCO *bco = (StgBCO *)*sp;
1119             sp++;
1120             StgWord size = BCO_BITMAP_SIZE(bco);
1121             mark_large_bitmap(queue, (StgClosure **) sp, BCO_BITMAP(bco), size);
1122             sp += size;
1123             continue;
1124         }
1125 
1126           // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1127         case RET_BIG:
1128         {
1129             StgWord size;
1130 
1131             size = GET_LARGE_BITMAP(&info->i)->size;
1132             sp++;
1133             mark_large_bitmap(queue, (StgClosure **) sp, GET_LARGE_BITMAP(&info->i), size);
1134             sp += size;
1135             // and don't forget to follow the SRT
1136             goto follow_srt;
1137         }
1138 
1139         case RET_FUN:
1140         {
1141             StgRetFun *ret_fun = (StgRetFun *)sp;
1142             const StgFunInfoTable *fun_info;
1143 
1144             markQueuePushClosure_(queue, ret_fun->fun);
1145             fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun));
1146             sp = mark_arg_block(queue, fun_info, ret_fun->payload);
1147             goto follow_srt;
1148         }
1149 
1150         default:
1151             barf("mark_stack: weird activation record found on stack: %d", (int)(info->i.type));
1152         }
1153     }
1154 }
1155 
1156 static GNUC_ATTR_HOT void
mark_stack(MarkQueue * queue,StgStack * stack)1157 mark_stack (MarkQueue *queue, StgStack *stack)
1158 {
1159     // TODO: Clear dirty if contains only old gen objects
1160 
1161     mark_stack_(queue, stack->sp, stack->stack + stack->stack_size);
1162 }
1163 
1164 /* See Note [Static objects under the nonmoving collector].
1165  *
1166  * Returns true if the object needs to be marked.
1167  */
1168 static bool
bump_static_flag(StgClosure ** link_field,StgClosure * q STG_UNUSED)1169 bump_static_flag(StgClosure **link_field, StgClosure *q STG_UNUSED)
1170 {
1171     while (1) {
1172         StgWord link = (StgWord) *link_field;
1173         StgWord new = (link & ~STATIC_BITS) | static_flag;
1174         if ((link & STATIC_BITS) == static_flag)
1175             return false;
1176         else if (cas((StgVolatilePtr) link_field, link, new) == link) {
1177             return true;
1178         }
1179     }
1180 }
1181 
1182 /* N.B. p0 may be tagged */
1183 static GNUC_ATTR_HOT void
mark_closure(MarkQueue * queue,const StgClosure * p0,StgClosure ** origin)1184 mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
1185 {
1186     StgClosure *p = (StgClosure*)p0;
1187 
1188  try_again:
1189     ;
1190     bdescr *bd = NULL;
1191     StgClosure *p_next = NULL;
1192     StgWord tag = GET_CLOSURE_TAG(p);
1193     p = UNTAG_CLOSURE(p);
1194 
1195 #   define PUSH_FIELD(obj, field)                                \
1196         markQueuePushClosure(queue,                              \
1197                                 (StgClosure *) (obj)->field,     \
1198                                 (StgClosure **) &(obj)->field)
1199 
1200     if (!HEAP_ALLOCED_GC(p)) {
1201         const StgInfoTable *info = get_itbl(p);
1202         StgHalfWord type = info->type;
1203 
1204         if (type == CONSTR_0_1 || type == CONSTR_0_2 || type == CONSTR_NOCAF) {
1205             // no need to put these on the static linked list, they don't need
1206             // to be marked.
1207             return;
1208         }
1209 
1210         switch (type) {
1211 
1212         case THUNK_STATIC:
1213             if (info->srt != 0) {
1214                 if (bump_static_flag(THUNK_STATIC_LINK((StgClosure *)p), p)) {
1215                     markQueuePushThunkSrt(queue, info); // TODO this function repeats the check above
1216                 }
1217             }
1218             goto done;
1219 
1220         case FUN_STATIC:
1221             if (info->srt != 0 || info->layout.payload.ptrs != 0) {
1222                 if (bump_static_flag(STATIC_LINK(info, (StgClosure *)p), p)) {
1223                     markQueuePushFunSrt(queue, info); // TODO this function repeats the check above
1224 
1225                     // a FUN_STATIC can also be an SRT, so it may have pointer
1226                     // fields.  See Note [SRTs] in CmmBuildInfoTables, specifically
1227                     // the [FUN] optimisation.
1228                     // TODO (osa) I don't understand this comment
1229                     for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) {
1230                         PUSH_FIELD(p, payload[i]);
1231                     }
1232                 }
1233             }
1234             goto done;
1235 
1236         case IND_STATIC:
1237             if (bump_static_flag(IND_STATIC_LINK((StgClosure *)p), p)) {
1238                 PUSH_FIELD((StgInd *) p, indirectee);
1239             }
1240             goto done;
1241 
1242         case CONSTR:
1243         case CONSTR_1_0:
1244         case CONSTR_2_0:
1245         case CONSTR_1_1:
1246             if (bump_static_flag(STATIC_LINK(info, (StgClosure *)p), p)) {
1247                 for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) {
1248                     PUSH_FIELD(p, payload[i]);
1249                 }
1250             }
1251             goto done;
1252 
1253         case WHITEHOLE:
1254             while (*(StgInfoTable* volatile*) &p->header.info == &stg_WHITEHOLE_info);
1255                 // busy_wait_nop(); // FIXME
1256             goto try_again;
1257 
1258         default:
1259             barf("mark_closure(static): strange closure type %d", (int)(info->type));
1260         }
1261     }
1262 
1263     bd = Bdescr((StgPtr) p);
1264 
1265     if (bd->gen != oldest_gen) {
1266         // Here we have an object living outside of the non-moving heap. While
1267         // we likely evacuated nearly everything to the nonmoving heap during
1268         // preparation there are nevertheless a few ways in which we might trace
1269         // a reference into younger generations:
1270         //
1271         //  * a mutable object might have been updated
1272         //  * we might have aged an object
1273         goto done;
1274     }
1275 
1276     ASSERTM(LOOKS_LIKE_CLOSURE_PTR(p), "invalid closure, info=%p", p->header.info);
1277 
1278     ASSERT(!IS_FORWARDING_PTR(p->header.info));
1279 
1280     // N.B. only the first block of a compact region is guaranteed to carry
1281     // BF_NONMOVING; conseqently we must separately check for BF_COMPACT.
1282     if (bd->flags & (BF_COMPACT | BF_NONMOVING)) {
1283 
1284         if (bd->flags & BF_COMPACT) {
1285             StgCompactNFData *str = objectGetCompact((StgClosure*)p);
1286             bd = Bdescr((P_)str);
1287 
1288             if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
1289                 // Not in the snapshot
1290                 return;
1291             }
1292 
1293             if (! (bd->flags & BF_MARKED)) {
1294                 dbl_link_remove(bd, &nonmoving_compact_objects);
1295                 dbl_link_onto(bd, &nonmoving_marked_compact_objects);
1296                 StgWord blocks = str->totalW / BLOCK_SIZE_W;
1297                 n_nonmoving_compact_blocks -= blocks;
1298                 n_nonmoving_marked_compact_blocks += blocks;
1299                 bd->flags |= BF_MARKED;
1300             }
1301 
1302             // N.B. the object being marked is in a compact region so by
1303             // definition there is no need to do any tracing here.
1304             goto done;
1305         } else if (bd->flags & BF_LARGE) {
1306             if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
1307                 // Not in the snapshot
1308                 goto done;
1309             }
1310             if (bd->flags & BF_MARKED) {
1311                 goto done;
1312             }
1313         } else {
1314             struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
1315             nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
1316 
1317             /* We don't mark blocks that,
1318              *  - were not live at the time that the snapshot was taken, or
1319              *  - we have already marked this cycle
1320              */
1321             uint8_t mark = nonmovingGetMark(seg, block_idx);
1322             /* Don't mark things we've already marked (since we may loop) */
1323             if (mark == nonmovingMarkEpoch)
1324                 goto done;
1325 
1326             StgClosure *snapshot_loc =
1327               (StgClosure *) nonmovingSegmentGetBlock(seg, nonmovingSegmentInfo(seg)->next_free_snap);
1328             if (p >= snapshot_loc && mark == 0) {
1329                 /*
1330                  * In this case we are looking at a block that wasn't allocated
1331                  * at the time that the snapshot was taken. We mustn't trace
1332                  * things above the allocation pointer that aren't marked since
1333                  * they may not be valid objects.
1334                  */
1335                 goto done;
1336             }
1337         }
1338     }
1339 
1340     // A pinned object that is still attached to a capability (because it's not
1341     // filled yet). No need to trace it pinned objects can't contain poiners.
1342     else if (bd->flags & BF_PINNED) {
1343 #if defined(DEBUG)
1344         bool found_it = false;
1345         for (uint32_t i = 0; i < n_capabilities; ++i) {
1346             if (capabilities[i]->pinned_object_block == bd) {
1347                 found_it = true;
1348                 break;
1349             }
1350         }
1351         ASSERT(found_it);
1352 #endif
1353         return; // we don't update origin here! TODO(osa): explain this
1354     }
1355 
1356     else {
1357         barf("Strange closure in nonmoving mark: %p", p);
1358     }
1359 
1360     /////////////////////////////////////////////////////
1361     // Trace pointers
1362     /////////////////////////////////////////////////////
1363 
1364     const StgInfoTable *info = get_itbl(p);
1365     switch (info->type) {
1366 
1367     case MVAR_CLEAN:
1368     case MVAR_DIRTY: {
1369         StgMVar *mvar = (StgMVar *) p;
1370         PUSH_FIELD(mvar, head);
1371         PUSH_FIELD(mvar, tail);
1372         PUSH_FIELD(mvar, value);
1373         break;
1374     }
1375 
1376     case TVAR: {
1377         StgTVar *tvar = ((StgTVar *)p);
1378         PUSH_FIELD(tvar, current_value);
1379         PUSH_FIELD(tvar, first_watch_queue_entry);
1380         break;
1381     }
1382 
1383     case FUN_2_0:
1384         markQueuePushFunSrt(queue, info);
1385         PUSH_FIELD(p, payload[1]);
1386         PUSH_FIELD(p, payload[0]);
1387         break;
1388 
1389     case THUNK_2_0: {
1390         StgThunk *thunk = (StgThunk *) p;
1391         markQueuePushThunkSrt(queue, info);
1392         PUSH_FIELD(thunk, payload[1]);
1393         PUSH_FIELD(thunk, payload[0]);
1394         break;
1395     }
1396 
1397     case CONSTR_2_0:
1398         PUSH_FIELD(p, payload[1]);
1399         PUSH_FIELD(p, payload[0]);
1400         break;
1401 
1402     case THUNK_1_0:
1403         markQueuePushThunkSrt(queue, info);
1404         PUSH_FIELD((StgThunk *) p, payload[0]);
1405         break;
1406 
1407     case FUN_1_0:
1408         markQueuePushFunSrt(queue, info);
1409         PUSH_FIELD(p, payload[0]);
1410         break;
1411 
1412     case CONSTR_1_0:
1413         PUSH_FIELD(p, payload[0]);
1414         break;
1415 
1416     case THUNK_0_1:
1417         markQueuePushThunkSrt(queue, info);
1418         break;
1419 
1420     case FUN_0_1:
1421         markQueuePushFunSrt(queue, info);
1422         break;
1423 
1424     case CONSTR_0_1:
1425     case CONSTR_0_2:
1426         break;
1427 
1428     case THUNK_0_2:
1429         markQueuePushThunkSrt(queue, info);
1430         break;
1431 
1432     case FUN_0_2:
1433         markQueuePushFunSrt(queue, info);
1434         break;
1435 
1436     case THUNK_1_1:
1437         markQueuePushThunkSrt(queue, info);
1438         PUSH_FIELD((StgThunk *) p, payload[0]);
1439         break;
1440 
1441     case FUN_1_1:
1442         markQueuePushFunSrt(queue, info);
1443         PUSH_FIELD(p, payload[0]);
1444         break;
1445 
1446     case CONSTR_1_1:
1447         PUSH_FIELD(p, payload[0]);
1448         break;
1449 
1450     case FUN:
1451         markQueuePushFunSrt(queue, info);
1452         goto gen_obj;
1453 
1454     case THUNK: {
1455         markQueuePushThunkSrt(queue, info);
1456         for (StgWord i = 0; i < info->layout.payload.ptrs; i++) {
1457             StgClosure **field = &((StgThunk *) p)->payload[i];
1458             markQueuePushClosure(queue, *field, field);
1459         }
1460         break;
1461     }
1462 
1463     gen_obj:
1464     case CONSTR:
1465     case CONSTR_NOCAF:
1466     case WEAK:
1467     case PRIM:
1468     {
1469         for (StgWord i = 0; i < info->layout.payload.ptrs; i++) {
1470             StgClosure **field = &((StgClosure *) p)->payload[i];
1471             markQueuePushClosure(queue, *field, field);
1472         }
1473         break;
1474     }
1475 
1476     case BCO: {
1477         StgBCO *bco = (StgBCO *)p;
1478         PUSH_FIELD(bco, instrs);
1479         PUSH_FIELD(bco, literals);
1480         PUSH_FIELD(bco, ptrs);
1481         break;
1482     }
1483 
1484 
1485     case IND: {
1486         PUSH_FIELD((StgInd *) p, indirectee);
1487         if (origin != NULL) {
1488             p_next = ((StgInd*)p)->indirectee;
1489         }
1490         break;
1491     }
1492 
1493     case BLACKHOLE: {
1494         PUSH_FIELD((StgInd *) p, indirectee);
1495         StgClosure *indirectee = ((StgInd*)p)->indirectee;
1496         if (GET_CLOSURE_TAG(indirectee) == 0 || origin == NULL) {
1497             // do nothing
1498         } else {
1499             p_next = indirectee;
1500         }
1501         break;
1502     }
1503 
1504     case MUT_VAR_CLEAN:
1505     case MUT_VAR_DIRTY:
1506         PUSH_FIELD((StgMutVar *)p, var);
1507         break;
1508 
1509     case BLOCKING_QUEUE: {
1510         StgBlockingQueue *bq = (StgBlockingQueue *)p;
1511         PUSH_FIELD(bq, bh);
1512         PUSH_FIELD(bq, owner);
1513         PUSH_FIELD(bq, queue);
1514         PUSH_FIELD(bq, link);
1515         break;
1516     }
1517 
1518     case THUNK_SELECTOR:
1519         if (RtsFlags.GcFlags.nonmovingSelectorOpt) {
1520             nonmoving_eval_thunk_selector(queue, (StgSelector*)p, origin);
1521         } else {
1522             PUSH_FIELD((StgSelector *) p, selectee);
1523         }
1524         break;
1525 
1526     case AP_STACK: {
1527         StgAP_STACK *ap = (StgAP_STACK *)p;
1528         PUSH_FIELD(ap, fun);
1529         mark_stack_(queue, (StgPtr) ap->payload, (StgPtr) ap->payload + ap->size);
1530         break;
1531     }
1532 
1533     case PAP: {
1534         StgPAP *pap = (StgPAP *) p;
1535         PUSH_FIELD(pap, fun);
1536         mark_PAP_payload(queue, pap->fun, pap->payload, pap->n_args);
1537         break;
1538     }
1539 
1540     case AP: {
1541         StgAP *ap = (StgAP *) p;
1542         PUSH_FIELD(ap, fun);
1543         mark_PAP_payload(queue, ap->fun, ap->payload, ap->n_args);
1544         break;
1545     }
1546 
1547     case ARR_WORDS:
1548         // nothing to follow
1549         break;
1550 
1551     case MUT_ARR_PTRS_CLEAN:
1552     case MUT_ARR_PTRS_DIRTY:
1553     case MUT_ARR_PTRS_FROZEN_CLEAN:
1554     case MUT_ARR_PTRS_FROZEN_DIRTY:
1555         // TODO: Check this against Scav.c
1556         markQueuePushArray(queue, (StgMutArrPtrs *) p, 0);
1557         break;
1558 
1559     case SMALL_MUT_ARR_PTRS_CLEAN:
1560     case SMALL_MUT_ARR_PTRS_DIRTY:
1561     case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
1562     case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY: {
1563         StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs *) p;
1564         for (StgWord i = 0; i < arr->ptrs; i++) {
1565             StgClosure **field = &arr->payload[i];
1566             markQueuePushClosure(queue, *field, field);
1567         }
1568         break;
1569     }
1570 
1571     case TSO:
1572         mark_tso(queue, (StgTSO *) p);
1573         break;
1574 
1575     case STACK: {
1576         // See Note [StgStack dirtiness flags and concurrent marking]
1577         StgStack *stack = (StgStack *) p;
1578         StgWord8 marking = stack->marking;
1579 
1580         // N.B. stack->marking must be != nonmovingMarkEpoch unless
1581         // someone has already marked it.
1582         if (cas_word8(&stack->marking, marking, nonmovingMarkEpoch)
1583               != nonmovingMarkEpoch) {
1584             // We have claimed the right to mark the stack.
1585             mark_stack(queue, stack);
1586         } else {
1587             // A mutator has already started marking the stack; we just let it
1588             // do its thing and move on. There's no reason to wait; we know that
1589             // the stack will be fully marked before we sweep due to the final
1590             // post-mark synchronization. Most importantly, we do not set its
1591             // mark bit, the mutator is responsible for this.
1592             goto done;
1593         }
1594         break;
1595     }
1596 
1597     case MUT_PRIM: {
1598         for (StgHalfWord p_idx = 0; p_idx < info->layout.payload.ptrs; ++p_idx) {
1599             StgClosure **field = &p->payload[p_idx];
1600             markQueuePushClosure(queue, *field, field);
1601         }
1602         break;
1603     }
1604 
1605     case TREC_CHUNK: {
1606         // TODO: Should we abort here? This should have already been marked
1607         // when we dirtied the TSO
1608         StgTRecChunk *tc = ((StgTRecChunk *) p);
1609         PUSH_FIELD(tc, prev_chunk);
1610         TRecEntry *end = &tc->entries[tc->next_entry_idx];
1611         for (TRecEntry *e = &tc->entries[0]; e < end; e++) {
1612             markQueuePushClosure_(queue, (StgClosure *) e->tvar);
1613             markQueuePushClosure_(queue, (StgClosure *) e->expected_value);
1614             markQueuePushClosure_(queue, (StgClosure *) e->new_value);
1615         }
1616         break;
1617     }
1618 
1619     case WHITEHOLE:
1620         while (*(StgInfoTable* volatile*) &p->header.info == &stg_WHITEHOLE_info);
1621         goto try_again;
1622 
1623     case COMPACT_NFDATA:
1624         break;
1625 
1626     default:
1627         barf("mark_closure: unimplemented/strange closure type %d @ %p",
1628              info->type, p);
1629     }
1630 
1631 #   undef PUSH_FIELD
1632 
1633     /* Set the mark bit: it's important that we do this only after we actually push
1634      * the object's pointers since in the case of marking stacks there may be a
1635      * mutator waiting for us to finish so it can start execution.
1636      */
1637     if (bd->flags & BF_LARGE) {
1638         /* Marking a large object isn't idempotent since we move it to
1639          * nonmoving_marked_large_objects; to ensure that we don't repeatedly
1640          * mark a large object, we only set BF_MARKED on large objects in the
1641          * nonmoving heap while holding nonmoving_large_objects_mutex
1642          */
1643         ACQUIRE_LOCK(&nonmoving_large_objects_mutex);
1644         if (! (bd->flags & BF_MARKED)) {
1645             // Remove the object from nonmoving_large_objects and link it to
1646             // nonmoving_marked_large_objects
1647             dbl_link_remove(bd, &nonmoving_large_objects);
1648             dbl_link_onto(bd, &nonmoving_marked_large_objects);
1649             n_nonmoving_large_blocks -= bd->blocks;
1650             n_nonmoving_marked_large_blocks += bd->blocks;
1651             bd->flags |= BF_MARKED;
1652         }
1653         RELEASE_LOCK(&nonmoving_large_objects_mutex);
1654     } else if (bd->flags & BF_NONMOVING) {
1655         // TODO: Kill repetition
1656         struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
1657         nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
1658         nonmovingSetMark(seg, block_idx);
1659         nonmoving_live_words += nonmovingSegmentBlockSize(seg) / sizeof(W_);
1660     }
1661 
1662     // If we found a indirection to shortcut keep going.
1663     if (p_next) {
1664         p = p_next;
1665         goto try_again;
1666     }
1667 
1668 done:
1669     if (origin != NULL && (!HEAP_ALLOCED(p) || bd->flags & BF_NONMOVING)) {
1670         if (UNTAG_CLOSURE((StgClosure*)p0) != p && *origin == p0) {
1671             if (cas((StgVolatilePtr)origin, (StgWord)p0, (StgWord)TAG_CLOSURE(tag, p)) == (StgWord)p0) {
1672                 // debugBelch("Thunk optimization successful\n");
1673             }
1674         }
1675     }
1676 }
1677 
1678 /* This is the main mark loop.
1679  * Invariants:
1680  *
1681  *  a. nonmovingPrepareMark has been called.
1682  *  b. the nursery has been fully evacuated into the non-moving generation.
1683  *  c. the mark queue has been seeded with a set of roots.
1684  *
1685  */
1686 GNUC_ATTR_HOT void
nonmovingMark(MarkQueue * queue)1687 nonmovingMark (MarkQueue *queue)
1688 {
1689     traceConcMarkBegin();
1690     debugTrace(DEBUG_nonmoving_gc, "Starting mark pass");
1691     unsigned int count = 0;
1692     while (true) {
1693         count++;
1694         MarkQueueEnt ent = markQueuePop(queue);
1695 
1696         switch (nonmovingMarkQueueEntryType(&ent)) {
1697         case MARK_CLOSURE:
1698             mark_closure(queue, ent.mark_closure.p, ent.mark_closure.origin);
1699             break;
1700         case MARK_ARRAY: {
1701             const StgMutArrPtrs *arr = (const StgMutArrPtrs *)
1702                 UNTAG_CLOSURE((StgClosure *) ent.mark_array.array);
1703             StgWord start = ent.mark_array.start_index;
1704             StgWord end = start + MARK_ARRAY_CHUNK_LENGTH;
1705             if (end < arr->ptrs) {
1706                 // There is more to be marked after this chunk.
1707                 markQueuePushArray(queue, arr, end);
1708             } else {
1709                 end = arr->ptrs;
1710             }
1711             for (StgWord i = start; i < end; i++) {
1712                 markQueuePushClosure_(queue, arr->payload[i]);
1713             }
1714             break;
1715         }
1716         case NULL_ENTRY:
1717             // Perhaps the update remembered set has more to mark...
1718             if (upd_rem_set_block_list) {
1719                 ACQUIRE_LOCK(&upd_rem_set_lock);
1720                 bdescr *old = queue->blocks;
1721                 queue->blocks = upd_rem_set_block_list;
1722                 queue->top = (MarkQueueBlock *) queue->blocks->start;
1723                 upd_rem_set_block_list = NULL;
1724                 RELEASE_LOCK(&upd_rem_set_lock);
1725 
1726                 ACQUIRE_SM_LOCK;
1727                 freeGroup(old);
1728                 RELEASE_SM_LOCK;
1729             } else {
1730                 // Nothing more to do
1731                 debugTrace(DEBUG_nonmoving_gc, "Finished mark pass: %d", count);
1732                 traceConcMarkEnd(count);
1733                 return;
1734             }
1735         }
1736     }
1737 }
1738 
1739 // A variant of `isAlive` that works for non-moving heap. Used for:
1740 //
1741 // - Collecting weak pointers; checking key of a weak pointer.
1742 // - Resurrecting threads; checking if a thread is dead.
1743 // - Sweeping object lists: large_objects, mut_list, stable_name_table.
1744 //
1745 // This may only be used after a full mark but before nonmovingSweep as it
1746 // relies on the correctness of the next_free_snap and mark bitmaps.
nonmovingIsAlive(StgClosure * p)1747 bool nonmovingIsAlive (StgClosure *p)
1748 {
1749     // Ignore static closures. See comments in `isAlive`.
1750     if (!HEAP_ALLOCED_GC(p)) {
1751         return true;
1752     }
1753 
1754     bdescr *bd = Bdescr((P_)p);
1755 
1756     // All non-static objects in the non-moving heap should be marked as
1757     // BF_NONMOVING
1758     ASSERT(bd->flags & BF_NONMOVING);
1759 
1760     if (bd->flags & (BF_COMPACT | BF_LARGE)) {
1761         if (bd->flags & BF_COMPACT) {
1762             StgCompactNFData *str = objectGetCompact((StgClosure*)p);
1763             bd = Bdescr((P_)str);
1764         }
1765         return (bd->flags & BF_NONMOVING_SWEEPING) == 0
1766                    // the large object wasn't in the snapshot and therefore wasn't marked
1767             || (bd->flags & BF_MARKED) != 0;
1768                    // The object was marked
1769     } else {
1770         struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
1771         nonmoving_block_idx i = nonmovingGetBlockIdx((StgPtr) p);
1772         uint8_t mark =  nonmovingGetMark(seg, i);
1773         if (i >= nonmovingSegmentInfo(seg)->next_free_snap) {
1774             // If the object is allocated after next_free_snap then one of the
1775             // following must be true:
1776             //
1777             // * if its mark is 0 then the block was not allocated last time
1778             //   the segment was swept; however, it may have been allocated since
1779             //   then and therefore we must conclude that the block is alive.
1780             //
1781             // * if its mark is equal to nonmovingMarkEpoch then we found that
1782             //   the object was alive in the snapshot of the current GC (recall
1783             //   that this function may only be used after a mark).
1784             //   Consequently we must conclude that the object is still alive.
1785             //
1786             // * if its mark is not equal to nonmovingMarkEpoch then we found
1787             //   that the object was not reachable in the last snapshot.
1788             //   Assuming that the mark is complete we can conclude that the
1789             //   object is dead since the snapshot invariant guarantees that
1790             //   all objects alive in the snapshot would be marked.
1791             //
1792             return mark == nonmovingMarkEpoch || mark == 0;
1793         } else {
1794             // If the object is below next_free_snap then the snapshot
1795             // invariant guarantees that it is marked if reachable.
1796             return mark == nonmovingMarkEpoch;
1797         }
1798     }
1799 }
1800 
1801 // Check whether a snapshotted object is alive. That is for an object that we
1802 // know to be in the snapshot, is its mark bit set. It is imperative that the
1803 // object is in the snapshot (e.g. was in the nonmoving heap at the time that
1804 // the snapshot was taken) since we assume that its mark bit reflects its
1805 // reachability.
1806 //
1807 // This is used when
1808 //
1809 // - Collecting weak pointers; checking key of a weak pointer.
1810 // - Resurrecting threads; checking if a thread is dead.
1811 // - Sweeping object lists: large_objects, mut_list, stable_name_table.
1812 //
nonmovingIsNowAlive(StgClosure * p)1813 static bool nonmovingIsNowAlive (StgClosure *p)
1814 {
1815     // Ignore static closures. See comments in `isAlive`.
1816     if (!HEAP_ALLOCED_GC(p)) {
1817         return true;
1818     }
1819 
1820     bdescr *bd = Bdescr((P_)p);
1821 
1822     // All non-static objects in the non-moving heap should be marked as
1823     // BF_NONMOVING
1824     ASSERT(bd->flags & BF_NONMOVING);
1825 
1826     if (bd->flags & BF_LARGE) {
1827         return (bd->flags & BF_NONMOVING_SWEEPING) == 0
1828                    // the large object wasn't in the snapshot and therefore wasn't marked
1829             || (bd->flags & BF_MARKED) != 0;
1830                    // The object was marked
1831     } else {
1832         return nonmovingClosureMarkedThisCycle((P_)p);
1833     }
1834 }
1835 
1836 // Non-moving heap variant of `tidyWeakList`
nonmovingTidyWeaks(struct MarkQueue_ * queue)1837 bool nonmovingTidyWeaks (struct MarkQueue_ *queue)
1838 {
1839     bool did_work = false;
1840 
1841     StgWeak **last_w = &nonmoving_old_weak_ptr_list;
1842     StgWeak *next_w;
1843     for (StgWeak *w = nonmoving_old_weak_ptr_list; w != NULL; w = next_w) {
1844         if (w->header.info == &stg_DEAD_WEAK_info) {
1845             // finalizeWeak# was called on the weak
1846             next_w = w->link;
1847             *last_w = next_w;
1848             continue;
1849         }
1850 
1851         // Otherwise it's a live weak
1852         ASSERT(w->header.info == &stg_WEAK_info);
1853 
1854         if (nonmovingIsNowAlive(w->key)) {
1855             nonmovingMarkLiveWeak(queue, w);
1856             did_work = true;
1857 
1858             // remove this weak ptr from old_weak_ptr list
1859             *last_w = w->link;
1860             next_w = w->link;
1861 
1862             // and put it on the weak ptr list
1863             w->link = nonmoving_weak_ptr_list;
1864             nonmoving_weak_ptr_list = w;
1865         } else {
1866             last_w = &(w->link);
1867             next_w = w->link;
1868         }
1869     }
1870 
1871     return did_work;
1872 }
1873 
nonmovingMarkDeadWeak(struct MarkQueue_ * queue,StgWeak * w)1874 void nonmovingMarkDeadWeak (struct MarkQueue_ *queue, StgWeak *w)
1875 {
1876     if (w->cfinalizers != &stg_NO_FINALIZER_closure) {
1877         markQueuePushClosure_(queue, w->value);
1878     }
1879     markQueuePushClosure_(queue, w->finalizer);
1880 }
1881 
nonmovingMarkLiveWeak(struct MarkQueue_ * queue,StgWeak * w)1882 void nonmovingMarkLiveWeak (struct MarkQueue_ *queue, StgWeak *w)
1883 {
1884     ASSERT(nonmovingClosureMarkedThisCycle((P_)w));
1885     markQueuePushClosure_(queue, w->value);
1886     markQueuePushClosure_(queue, w->finalizer);
1887     markQueuePushClosure_(queue, w->cfinalizers);
1888 }
1889 
1890 // When we're done with marking, any weak pointers with non-marked keys will be
1891 // considered "dead". We mark values and finalizers of such weaks, and then
1892 // schedule them for finalization in `scheduleFinalizers` (which we run during
1893 // synchronization).
nonmovingMarkDeadWeaks(struct MarkQueue_ * queue,StgWeak ** dead_weaks)1894 void nonmovingMarkDeadWeaks (struct MarkQueue_ *queue, StgWeak **dead_weaks)
1895 {
1896     StgWeak *next_w;
1897     for (StgWeak *w = nonmoving_old_weak_ptr_list; w; w = next_w) {
1898         ASSERT(!nonmovingClosureMarkedThisCycle((P_)(w->key)));
1899         nonmovingMarkDeadWeak(queue, w);
1900         next_w = w ->link;
1901         w->link = *dead_weaks;
1902         *dead_weaks = w;
1903     }
1904 }
1905 
1906 // Non-moving heap variant of of `tidyThreadList`
nonmovingTidyThreads()1907 void nonmovingTidyThreads ()
1908 {
1909     StgTSO *next;
1910     StgTSO **prev = &nonmoving_old_threads;
1911     for (StgTSO *t = nonmoving_old_threads; t != END_TSO_QUEUE; t = next) {
1912 
1913         next = t->global_link;
1914 
1915         // N.B. This thread is in old_threads, consequently we *know* it is in
1916         // the snapshot and it is therefore safe to rely on the bitmap to
1917         // determine its reachability.
1918         if (nonmovingIsNowAlive((StgClosure*)t)) {
1919             // alive
1920             *prev = next;
1921 
1922             // move this thread onto threads list
1923             t->global_link = nonmoving_threads;
1924             nonmoving_threads = t;
1925         } else {
1926             // not alive (yet): leave this thread on the old_threads list
1927             prev = &(t->global_link);
1928         }
1929     }
1930 }
1931 
1932 // Mark threads which appear to be dead but still need to be properly torn down
1933 // by resurrectThreads.
nonmovingResurrectThreads(struct MarkQueue_ * queue,StgTSO ** resurrected_threads)1934 void nonmovingResurrectThreads (struct MarkQueue_ *queue, StgTSO **resurrected_threads)
1935 {
1936     StgTSO *next;
1937     for (StgTSO *t = nonmoving_old_threads; t != END_TSO_QUEUE; t = next) {
1938         next = t->global_link;
1939 
1940         switch (t->what_next) {
1941         case ThreadKilled:
1942         case ThreadComplete:
1943             continue;
1944         default:
1945             // The thread may be, e.g., deadlocked in which case we must ensure
1946             // it isn't swept since resurrectThreads will need to throw it an
1947             // exception.
1948             markQueuePushClosure_(queue, (StgClosure*)t);
1949             t->global_link = *resurrected_threads;
1950             *resurrected_threads = t;
1951         }
1952     }
1953 }
1954 
1955 #if defined(DEBUG)
1956 
printMarkQueueEntry(MarkQueueEnt * ent)1957 void printMarkQueueEntry (MarkQueueEnt *ent)
1958 {
1959     switch(nonmovingMarkQueueEntryType(ent)) {
1960       case MARK_CLOSURE:
1961         debugBelch("Closure: ");
1962         printClosure(ent->mark_closure.p);
1963         break;
1964       case MARK_ARRAY:
1965         debugBelch("Array\n");
1966         break;
1967       case NULL_ENTRY:
1968         debugBelch("End of mark\n");
1969         break;
1970     }
1971 }
1972 
printMarkQueue(MarkQueue * q)1973 void printMarkQueue (MarkQueue *q)
1974 {
1975     debugBelch("======== MARK QUEUE ========\n");
1976     for (bdescr *block = q->blocks; block; block = block->link) {
1977         MarkQueueBlock *queue = (MarkQueueBlock*)block->start;
1978         for (uint32_t i = 0; i < queue->head; ++i) {
1979             printMarkQueueEntry(&queue->entries[i]);
1980         }
1981     }
1982     debugBelch("===== END OF MARK QUEUE ====\n");
1983 }
1984 
1985 #endif
1986