1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2018
4 *
5 * Non-moving garbage collector and allocator: Mark phase
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "Rts.h"
10 // We call evacuate, which expects the thread-local gc_thread to be valid;
11 // This is sometimes declared as a register variable therefore it is necessary
12 // to include the declaration so that the compiler doesn't clobber the register.
13 #include "NonMovingMark.h"
14 #include "NonMovingShortcut.h"
15 #include "NonMoving.h"
16 #include "BlockAlloc.h" /* for countBlocks */
17 #include "HeapAlloc.h"
18 #include "Task.h"
19 #include "Trace.h"
20 #include "HeapUtils.h"
21 #include "Printer.h"
22 #include "Schedule.h"
23 #include "Weak.h"
24 #include "Stats.h"
25 #include "STM.h"
26 #include "MarkWeak.h"
27 #include "sm/Storage.h"
28 #include "CNF.h"
29
30 static bool check_in_nonmoving_heap(StgClosure *p);
31 static void mark_closure (MarkQueue *queue, const StgClosure *p, StgClosure **origin);
32 static void mark_tso (MarkQueue *queue, StgTSO *tso);
33 static void mark_stack (MarkQueue *queue, StgStack *stack);
34 static void mark_PAP_payload (MarkQueue *queue,
35 StgClosure *fun,
36 StgClosure **payload,
37 StgWord size);
38
39 // How many Array# entries to add to the mark queue at once?
40 #define MARK_ARRAY_CHUNK_LENGTH 128
41
42 /* Note [Large objects in the non-moving collector]
43 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
44 * The nonmoving collector keeps a separate list of its large objects, apart from
45 * oldest_gen->large_objects. There are two reasons for this:
46 *
47 * 1. oldest_gen is mutated by minor collections, which happen concurrently with
48 * marking
49 * 2. the non-moving collector needs a consistent picture
50 *
51 * At the beginning of a major collection, nonmovingCollect takes the objects in
52 * oldest_gen->large_objects (which includes all large objects evacuated by the
53 * moving collector) and adds them to nonmoving_large_objects. This is the set
54 * of large objects that will being collected in the current major GC cycle.
55 *
56 * As the concurrent mark phase proceeds, the large objects in
57 * nonmoving_large_objects that are found to be live are moved to
58 * nonmoving_marked_large_objects. During sweep we discard all objects that remain
59 * in nonmoving_large_objects and move everything in nonmoving_marked_larged_objects
60 * back to nonmoving_large_objects.
61 *
62 * During minor collections large objects will accumulate on
63 * oldest_gen->large_objects, where they will be picked up by the nonmoving
64 * collector and moved to nonmoving_large_objects during the next major GC.
65 * When this happens the block gets its BF_NONMOVING_SWEEPING flag set to
66 * indicate that it is part of the snapshot and consequently should be marked by
67 * the nonmoving mark phase.
68 *
69 * Note that pinned object blocks are treated as large objects containing only
70 * a single object. That is, the block has a single mark flag (BF_MARKED) and we
71 * consequently will trace the pointers of only one object per block. However,
72 * this is okay since the only type of pinned object supported by GHC is the
73 * pinned ByteArray#, which has no pointers.
74 */
75
76 bdescr *nonmoving_large_objects = NULL;
77 bdescr *nonmoving_marked_large_objects = NULL;
78 memcount n_nonmoving_large_blocks = 0;
79 memcount n_nonmoving_marked_large_blocks = 0;
80
81 bdescr *nonmoving_compact_objects = NULL;
82 bdescr *nonmoving_marked_compact_objects = NULL;
83 memcount n_nonmoving_compact_blocks = 0;
84 memcount n_nonmoving_marked_compact_blocks = 0;
85
86 #if defined(THREADED_RTS)
87 /* Protects everything above. Furthermore, we only set the BF_MARKED bit of
88 * large object blocks when this is held. This ensures that the write barrier
89 * (e.g. finish_upd_rem_set_mark) and the collector (mark_closure) don't try to
90 * move the same large object to nonmoving_marked_large_objects more than once.
91 */
92 static Mutex nonmoving_large_objects_mutex;
93 // Note that we don't need a similar lock for compact objects becuase we never
94 // mark a compact object eagerly in a write barrier; all compact objects are
95 // marked by the mark thread, so there can't be any races here.
96 #endif
97
98 /*
99 * Where we keep our threads during collection since we must have a snapshot of
100 * the threads that lived in the nonmoving heap at the time that the snapshot
101 * was taken to safely resurrect.
102 */
103 StgTSO *nonmoving_old_threads = END_TSO_QUEUE;
104 /* Same for weak pointers */
105 StgWeak *nonmoving_old_weak_ptr_list = NULL;
106 /* Because we can "tidy" thread and weak lists concurrently with a minor GC we
107 * need to move marked threads and weaks to these lists until we pause for sync.
108 * Then we move them to oldest_gen lists. */
109 StgTSO *nonmoving_threads = END_TSO_QUEUE;
110 StgWeak *nonmoving_weak_ptr_list = NULL;
111
112 #if defined(DEBUG)
113 // TODO (osa): Document
114 StgIndStatic *debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST;
115 #endif
116
117 /* Note [Update remembered set]
118 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
119 * The concurrent non-moving collector uses a remembered set to ensure
120 * that its marking is consistent with the snapshot invariant defined in
121 * the design. This remembered set, known as the update remembered set,
122 * records all pointers that have been overwritten since the beginning
123 * of the concurrent mark. This ensures that concurrent mutation cannot hide
124 * pointers to live objects from the nonmoving garbage collector.
125 *
126 * The update remembered set is maintained via a write barrier that
127 * is enabled whenever a concurrent mark is active. This write barrier
128 * can be found in a number of places:
129 *
130 * - In rts/Primops.cmm in primops responsible for modifying mutable closures
131 * (e.g. MVARs, MUT_VARs, etc.)
132 *
133 * - In rts/STM.c, where
134 *
135 * - In the dirty_* functions found in rts/Storage.c where we dirty MVARs,
136 * MUT_VARs, TSOs and STACKs. STACK is a somewhat special case, as described
137 * in Note [StgStack dirtiness flags and concurrent marking] in TSO.h.
138 *
139 * - In the code generated by the STG code generator for pointer array writes
140 *
141 * - In thunk updates (e.g. updateWithIndirection) to ensure that the free
142 * variables of the original thunk remain reachable.
143 *
144 * There is also a read barrier to handle weak references, as described in
145 * Note [Concurrent read barrier on deRefWeak#].
146 *
147 * The representation of the update remembered set is the same as that of
148 * the mark queue. For efficiency, each capability maintains its own local
149 * accumulator of remembered set entries. When a capability fills its
150 * accumulator it is linked in to the global remembered set
151 * (upd_rem_set_block_list), where it is consumed by the mark phase.
152 *
153 * The mark phase is responsible for freeing update remembered set block
154 * allocations.
155 *
156 * Note that we eagerly flush update remembered sets during minor GCs as
157 * described in Note [Eager update remembered set flushing].
158 *
159 *
160 * Note [Eager update remembered set flushing]
161 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
162 *
163 * We eagerly flush update remembered sets during minor GCs to avoid scenarios
164 * like the following which could result in long sync pauses:
165 *
166 * 1. We start a major GC, all thread stacks are added to the mark queue.
167 * 2. The concurrent mark thread starts.
168 * 3. The mutator is allowed to resume. One mutator thread T is scheduled and marks its
169 * stack to its local update remembered set.
170 * 4. The mark thread eventually encounters the mutator thread's stack but
171 * sees that it has already been marked; skips it.
172 * 5. Thread T continues running but does not push enough to its update
173 * remembered set to require a flush.
174 * 6. Eventually the mark thread finished marking and requests a final sync.
175 * 7. The thread T flushes its update remembered set.
176 * 8. We find that a large fraction of the heap (namely the subset that is
177 * only reachable from the thread T's stack) needs to be marked, incurring
178 * a large sync pause
179 *
180 * We avoid this by periodically (during minor GC) forcing a flush of the
181 * update remembered set.
182 *
183 * A better (but more complex) approach that would be worthwhile trying in the
184 * future would be to rather do the following:
185 *
186 * 1. Concurrent mark phase is on-going
187 * 2. Mark thread runs out of things to mark
188 * 3. Mark thread sends a signal to capabilities requesting that they send
189 * their update remembered sets without suspending their execution
190 * 4. The mark thread marks everything it was sent; runs out of things to mark
191 * 5. Mark thread initiates a sync
192 * 6. Capabilities send their final update remembered sets and suspend execution
193 * 7. Mark thread marks everything is was sent
194 * 8. Mark thead allows capabilities to resume.
195 *
196 * However, this is obviously a fair amount of complexity and so far the
197 * periodic eager flushing approach has been sufficient.
198 *
199 *
200 * Note [Concurrent read barrier on deRefWeak#]
201 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
202 *
203 * In general the non-moving GC assumes that all pointers reachable from a
204 * marked object are themselves marked (or in the mark queue). However,
205 * weak pointers are an obvious exception to this rule. In particular,
206 * deRefWeakPtr# allows the mutator to turn a weak reference into a strong
207 * reference. This interacts badly with concurrent collection. For
208 * instance, consider this program:
209 *
210 * f :: a -> b -> IO b
211 * f k v = do
212 * -- assume that k and v are the only references to the
213 * -- closures to which they refer.
214 * weak <- mkWeakPtr k v Nothing
215 *
216 * -- N.B. k is now technically dead since the only reference to it is
217 * -- weak, but we've not yet had a chance to tombstone the WeakPtr
218 * -- (which will happen in the course of major GC).
219 * performMajorGC
220 * -- Now we are running concurrently with the mark...
221
222 * Just x <- deRefWeak weak
223 * -- We have now introduced a reference to `v`, which will
224 * -- not be marked as the only reference to `v` when the snapshot was
225 * -- taken is via a WeakPtr.
226 * return x
227 *
228 */
229 bdescr *upd_rem_set_block_list = NULL;
230 #if defined(THREADED_RTS)
231 static Mutex upd_rem_set_lock;
232
233 /* Used during the mark/sweep phase transition to track how many capabilities
234 * have pushed their update remembered sets. Protected by upd_rem_set_lock.
235 */
236 static volatile StgWord upd_rem_set_flush_count = 0;
237
238 /* Signaled by each capability when it has flushed its update remembered set */
239 static Condition upd_rem_set_flushed_cond;
240 #endif
241
242 /* Indicates to mutators that the write barrier must be respected. Set while
243 * concurrent mark is running.
244 */
245 StgWord nonmoving_write_barrier_enabled = false;
246
247 /* Used to provide the current mark queue to the young generation
248 * collector for scavenging.
249 */
250 MarkQueue *current_mark_queue = NULL;
251
252 /* Initialise update remembered set data structures */
nonmovingMarkInitUpdRemSet()253 void nonmovingMarkInitUpdRemSet() {
254 #if defined(THREADED_RTS)
255 initMutex(&upd_rem_set_lock);
256 initCondition(&upd_rem_set_flushed_cond);
257 initMutex(&nonmoving_large_objects_mutex);
258 #endif
259 }
260
261 #if defined(THREADED_RTS) && defined(DEBUG)
262 static uint32_t markQueueLength(MarkQueue *q);
263 #endif
264 static void init_mark_queue_(MarkQueue *queue);
265
266 /* Transfers the given capability's update-remembered set to the global
267 * remembered set.
268 *
269 * Really the argument type should be UpdRemSet* but this would be rather
270 * inconvenient without polymorphism.
271 */
nonmovingAddUpdRemSetBlocks(MarkQueue * rset)272 void nonmovingAddUpdRemSetBlocks(MarkQueue *rset)
273 {
274 if (markQueueIsEmpty(rset)) return;
275
276 // find the tail of the queue
277 bdescr *start = rset->blocks;
278 bdescr *end = start;
279 while (end->link != NULL)
280 end = end->link;
281
282 // add the blocks to the global remembered set
283 ACQUIRE_LOCK(&upd_rem_set_lock);
284 end->link = upd_rem_set_block_list;
285 upd_rem_set_block_list = start;
286 RELEASE_LOCK(&upd_rem_set_lock);
287
288 // Reset remembered set
289 ACQUIRE_SM_LOCK;
290 init_mark_queue_(rset);
291 rset->is_upd_rem_set = true;
292 RELEASE_SM_LOCK;
293 }
294
295 #if defined(THREADED_RTS)
296 /* Called by capabilities to flush their update remembered sets when
297 * synchronising with the non-moving collector as it transitions from mark to
298 * sweep phase.
299 */
nonmovingFlushCapUpdRemSetBlocks(Capability * cap)300 void nonmovingFlushCapUpdRemSetBlocks(Capability *cap)
301 {
302 debugTrace(DEBUG_nonmoving_gc,
303 "Capability %d flushing update remembered set: %d",
304 cap->no, markQueueLength(&cap->upd_rem_set.queue));
305 traceConcUpdRemSetFlush(cap);
306 nonmovingAddUpdRemSetBlocks(&cap->upd_rem_set.queue);
307 atomic_inc(&upd_rem_set_flush_count, 1);
308 signalCondition(&upd_rem_set_flushed_cond);
309 // After this mutation will remain suspended until nonmovingFinishFlush
310 // releases its capabilities.
311 }
312
313 /* Request that all capabilities flush their update remembered sets and suspend
314 * execution until the further notice.
315 */
nonmovingBeginFlush(Task * task)316 void nonmovingBeginFlush(Task *task)
317 {
318 debugTrace(DEBUG_nonmoving_gc, "Starting update remembered set flush...");
319 traceConcSyncBegin();
320 upd_rem_set_flush_count = 0;
321 stat_startNonmovingGcSync();
322 stopAllCapabilitiesWith(NULL, task, SYNC_FLUSH_UPD_REM_SET);
323
324 // XXX: We may have been given a capability via releaseCapability (i.e. a
325 // task suspended due to a foreign call) in which case our requestSync
326 // logic won't have been hit. Make sure that everyone so far has flushed.
327 // Ideally we want to mark asynchronously with syncing.
328 for (uint32_t i = 0; i < n_capabilities; i++) {
329 nonmovingFlushCapUpdRemSetBlocks(capabilities[i]);
330 }
331 }
332
333 /* Wait until a capability has flushed its update remembered set. Returns true
334 * if all capabilities have flushed.
335 */
nonmovingWaitForFlush()336 bool nonmovingWaitForFlush()
337 {
338 ACQUIRE_LOCK(&upd_rem_set_lock);
339 debugTrace(DEBUG_nonmoving_gc, "Flush count %d", upd_rem_set_flush_count);
340 bool finished = upd_rem_set_flush_count == n_capabilities;
341 if (!finished) {
342 waitCondition(&upd_rem_set_flushed_cond, &upd_rem_set_lock);
343 }
344 RELEASE_LOCK(&upd_rem_set_lock);
345 return finished;
346 }
347
348 /* Note [Unintentional marking in resurrectThreads]
349 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
350 * In both moving and non-moving collectors threads found to be unreachable are
351 * evacuated/marked and then resurrected with resurrectThreads. resurrectThreads
352 * raises an exception in the unreachable thread via raiseAsync, which does
353 * mutations on the heap. These mutations cause adding stuff to UpdRemSet of the
354 * thread's capability. Here's an example backtrace where this happens:
355 *
356 * #0 updateRemembSetPushClosure
357 * #1 0x000000000072b363 in dirty_TVAR
358 * #2 0x00000000007162e5 in remove_watch_queue_entries_for_trec
359 * #3 0x0000000000717098 in stmAbortTransaction
360 * #4 0x000000000070c6eb in raiseAsync
361 * #5 0x000000000070b473 in throwToSingleThreaded__
362 * #6 0x000000000070b4ab in throwToSingleThreaded
363 * #7 0x00000000006fce82 in resurrectThreads
364 * #8 0x00000000007215db in nonmovingMark_
365 * #9 0x0000000000721438 in nonmovingConcurrentMark
366 * #10 0x00007f1ee81cd6db in start_thread
367 * #11 0x00007f1ee850688f in clone
368 *
369 * However we don't really want to run write barriers when calling
370 * resurrectThreads here, because we're in a GC pause, and overwritten values
371 * are definitely gone forever (as opposed to being inserted in a marked object
372 * or kept in registers and used later).
373 *
374 * When this happens, if we don't reset the UpdRemSets, what happens is in the
375 * next mark we see these objects that were added in previous mark's
376 * resurrectThreads in UpdRemSets, and mark those. This causes keeping
377 * unreachable objects alive, and effects weak finalization and thread resurrect
378 * (which rely on things become unreachable). As an example, stm048 fails when
379 * we get this wrong, because when we do raiseAsync on a thread that was blocked
380 * on an STM transaction we mutate a TVAR_WATCH_QUEUE, which has a reference to
381 * the TSO that was running the STM transaction. If the TSO becomes unreachable
382 * again in the next GC we don't realize this, because it was added to an
383 * UpdRemSet in the previous GC's mark phase, because of raiseAsync.
384 *
385 * To fix this we clear all UpdRemSets in nonmovingFinishFlush, right before
386 * releasing capabilities. This is somewhat inefficient (we allow adding objects
387 * to UpdRemSets, only to later reset them), but the only case where we add to
388 * UpdRemSets during mark is resurrectThreads, and I don't think we do so many
389 * resurrection in a thread that we fill UpdRemSets and allocate new blocks. So
390 * pushing an UpdRemSet in this case is really fast, and resetting is even
391 * faster (we just update a pointer).
392 *
393 * TODO (osa): What if we actually marked UpdRemSets in this case, in the mark
394 * loop? Would that work? Or what would break?
395 */
396
397 /* Notify capabilities that the synchronisation is finished; they may resume
398 * execution.
399 */
nonmovingFinishFlush(Task * task)400 void nonmovingFinishFlush(Task *task)
401 {
402 // See Note [Unintentional marking in resurrectThreads]
403 for (uint32_t i = 0; i < n_capabilities; i++) {
404 reset_upd_rem_set(&capabilities[i]->upd_rem_set);
405 }
406 // Also reset upd_rem_set_block_list in case some of the UpdRemSets were
407 // filled and we flushed them.
408 freeChain_lock(upd_rem_set_block_list);
409 upd_rem_set_block_list = NULL;
410
411 debugTrace(DEBUG_nonmoving_gc, "Finished update remembered set flush...");
412 traceConcSyncEnd();
413 stat_endNonmovingGcSync();
414 releaseAllCapabilities(n_capabilities, NULL, task);
415 }
416 #endif
417
418 /*********************************************************
419 * Pushing to either the mark queue or remembered set
420 *********************************************************/
421
422 STATIC_INLINE void
push(MarkQueue * q,const MarkQueueEnt * ent)423 push (MarkQueue *q, const MarkQueueEnt *ent)
424 {
425 // Are we at the end of the block?
426 if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
427 // Yes, this block is full.
428 if (q->is_upd_rem_set) {
429 nonmovingAddUpdRemSetBlocks(q);
430 } else {
431 // allocate a fresh block.
432 ACQUIRE_SM_LOCK;
433 bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
434 bd->link = q->blocks;
435 q->blocks = bd;
436 q->top = (MarkQueueBlock *) bd->start;
437 q->top->head = 0;
438 RELEASE_SM_LOCK;
439 }
440 }
441
442 q->top->entries[q->top->head] = *ent;
443 q->top->head++;
444 }
445
446 /* A variant of push to be used by the minor GC when it encounters a reference
447 * to an object in the non-moving heap. In contrast to the other push
448 * operations this uses the gc_alloc_block_sync spinlock instead of the
449 * SM_LOCK to allocate new blocks in the event that the mark queue is full.
450 */
451 void
markQueuePushClosureGC(MarkQueue * q,StgClosure * p)452 markQueuePushClosureGC (MarkQueue *q, StgClosure *p)
453 {
454 if (!check_in_nonmoving_heap(p)) {
455 return;
456 }
457
458 /* We should not make it here if we are doing a deadlock detect GC.
459 * See Note [Deadlock detection under nonmoving collector].
460 * This is actually no longer true due to call in nonmovingScavengeOne
461 * introduced due to Note [Dirty flags in the non-moving collector]
462 * (see NonMoving.c).
463 */
464 //ASSERT(!deadlock_detect_gc);
465
466 // Are we at the end of the block?
467 if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
468 // Yes, this block is full.
469 // allocate a fresh block.
470 ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
471 bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
472 bd->link = q->blocks;
473 q->blocks = bd;
474 q->top = (MarkQueueBlock *) bd->start;
475 q->top->head = 0;
476 RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
477 }
478
479 MarkQueueEnt ent = {
480 .mark_closure = {
481 .p = TAG_CLOSURE(MARK_CLOSURE, UNTAG_CLOSURE(p)),
482 .origin = NULL,
483 }
484 };
485 q->top->entries[q->top->head] = ent;
486 q->top->head++;
487 }
488
489 static inline
push_closure(MarkQueue * q,StgClosure * p,StgClosure ** origin)490 void push_closure (MarkQueue *q,
491 StgClosure *p,
492 StgClosure **origin)
493 {
494 #if defined(DEBUG)
495 ASSERT(!HEAP_ALLOCED_GC(p) || (Bdescr((StgPtr) p)->gen == oldest_gen));
496 ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
497 // Commenting out: too slow
498 // if (RtsFlags.DebugFlags.sanity) {
499 // assert_in_nonmoving_heap((P_)p);
500 // if (origin)
501 // assert_in_nonmoving_heap((P_)origin);
502 // }
503 #endif
504
505 // This must be true as origin points to a pointer and therefore must be
506 // word-aligned. However, we check this as otherwise we would confuse this
507 // with a mark_array entry
508 ASSERT(((uintptr_t) origin & 0x3) == 0);
509
510 MarkQueueEnt ent = {
511 .mark_closure = {
512 .p = TAG_CLOSURE(MARK_CLOSURE, UNTAG_CLOSURE(p)),
513 .origin = origin,
514 }
515 };
516 push(q, &ent);
517 }
518
519 static
push_array(MarkQueue * q,const StgMutArrPtrs * array,StgWord start_index)520 void push_array (MarkQueue *q,
521 const StgMutArrPtrs *array,
522 StgWord start_index)
523 {
524 // TODO: Push this into callers where they already have the Bdescr
525 if (HEAP_ALLOCED_GC(array) && (Bdescr((StgPtr) array)->gen != oldest_gen))
526 return;
527
528 MarkQueueEnt ent = {
529 .mark_array = {
530 .array = (const StgMutArrPtrs *) TAG_CLOSURE(MARK_ARRAY, UNTAG_CLOSURE((StgClosure *) array)),
531 .start_index = start_index,
532 }
533 };
534 push(q, &ent);
535 }
536
537 static
push_thunk_srt(MarkQueue * q,const StgInfoTable * info)538 void push_thunk_srt (MarkQueue *q, const StgInfoTable *info)
539 {
540 const StgThunkInfoTable *thunk_info = itbl_to_thunk_itbl(info);
541 if (thunk_info->i.srt) {
542 push_closure(q, (StgClosure*)GET_SRT(thunk_info), NULL);
543 }
544 }
545
546 static
push_fun_srt(MarkQueue * q,const StgInfoTable * info)547 void push_fun_srt (MarkQueue *q, const StgInfoTable *info)
548 {
549 const StgFunInfoTable *fun_info = itbl_to_fun_itbl(info);
550 if (fun_info->i.srt) {
551 push_closure(q, (StgClosure*)GET_FUN_SRT(fun_info), NULL);
552 }
553 }
554
555 /*********************************************************
556 * Pushing to the update remembered set
557 *
558 * upd_rem_set_push_* functions are directly called by
559 * mutators and need to check whether the value is in
560 * non-moving heap.
561 *********************************************************/
562
563 // Check if the object is traced by the non-moving collector. This holds in two
564 // conditions:
565 //
566 // - Object is in non-moving heap
567 // - Object is a large (BF_LARGE) and marked as BF_NONMOVING
568 // - Object is static (HEAP_ALLOCED_GC(obj) == false)
569 //
570 static
check_in_nonmoving_heap(StgClosure * p)571 bool check_in_nonmoving_heap(StgClosure *p) {
572 if (HEAP_ALLOCED_GC(p)) {
573 // This works for both large and small objects:
574 return Bdescr((P_)p)->flags & BF_NONMOVING;
575 } else {
576 return true; // a static object
577 }
578 }
579
580 /* Push the free variables of a (now-evaluated) thunk to the
581 * update remembered set.
582 */
updateRemembSetPushThunk(Capability * cap,StgThunk * thunk)583 inline void updateRemembSetPushThunk(Capability *cap, StgThunk *thunk)
584 {
585 const StgInfoTable *info;
586 do {
587 info = *(StgInfoTable* volatile*) &thunk->header.info;
588 } while (info == &stg_WHITEHOLE_info);
589
590 const StgThunkInfoTable *thunk_info = THUNK_INFO_PTR_TO_STRUCT(info);
591 updateRemembSetPushThunkEager(cap, thunk_info, thunk);
592 }
593
594 /* Push the free variables of a thunk to the update remembered set.
595 * This is called by the thunk update code (e.g. updateWithIndirection) before
596 * we update the indirectee to ensure that the thunk's free variables remain
597 * visible to the concurrent collector.
598 *
599 * See Note [Update rememembered set].
600 */
updateRemembSetPushThunkEager(Capability * cap,const StgThunkInfoTable * info,StgThunk * thunk)601 void updateRemembSetPushThunkEager(Capability *cap,
602 const StgThunkInfoTable *info,
603 StgThunk *thunk)
604 {
605 /* N.B. info->i.type mustn't be WHITEHOLE */
606 MarkQueue *queue = &cap->upd_rem_set.queue;
607 switch (info->i.type) {
608 case THUNK:
609 case THUNK_1_0:
610 case THUNK_0_1:
611 case THUNK_2_0:
612 case THUNK_1_1:
613 case THUNK_0_2:
614 {
615 push_thunk_srt(queue, &info->i);
616
617 for (StgWord i = 0; i < info->i.layout.payload.ptrs; i++) {
618 if (check_in_nonmoving_heap(thunk->payload[i])) {
619 // Don't bother to push origin; it makes the barrier needlessly
620 // expensive with little benefit.
621 push_closure(queue, thunk->payload[i], NULL);
622 }
623 }
624 break;
625 }
626 case AP:
627 {
628 StgAP *ap = (StgAP *) thunk;
629 if (check_in_nonmoving_heap(ap->fun)) {
630 push_closure(queue, ap->fun, NULL);
631 }
632 mark_PAP_payload(queue, ap->fun, ap->payload, ap->n_args);
633 break;
634 }
635 case THUNK_SELECTOR:
636 case BLACKHOLE:
637 // TODO: This is right, right?
638 break;
639 // The selector optimization performed by the nonmoving mark may have
640 // overwritten a thunk which we are updating with an indirection.
641 case IND:
642 {
643 StgInd *ind = (StgInd *) thunk;
644 if (check_in_nonmoving_heap(ind->indirectee)) {
645 push_closure(queue, ind->indirectee, NULL);
646 }
647 break;
648 }
649 default:
650 barf("updateRemembSetPushThunk: invalid thunk pushed: p=%p, type=%d",
651 thunk, info->i.type);
652 }
653 }
654
updateRemembSetPushThunk_(StgRegTable * reg,StgThunk * p)655 void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p)
656 {
657 updateRemembSetPushThunk(regTableToCapability(reg), p);
658 }
659
updateRemembSetPushClosure(Capability * cap,StgClosure * p)660 inline void updateRemembSetPushClosure(Capability *cap, StgClosure *p)
661 {
662 if (check_in_nonmoving_heap(p)) {
663 MarkQueue *queue = &cap->upd_rem_set.queue;
664 push_closure(queue, p, NULL);
665 }
666 }
667
updateRemembSetPushClosure_(StgRegTable * reg,struct StgClosure_ * p)668 void updateRemembSetPushClosure_(StgRegTable *reg, struct StgClosure_ *p)
669 {
670 updateRemembSetPushClosure(regTableToCapability(reg), p);
671 }
672
needs_upd_rem_set_mark(StgClosure * p)673 STATIC_INLINE bool needs_upd_rem_set_mark(StgClosure *p)
674 {
675 // TODO: Deduplicate with mark_closure
676 bdescr *bd = Bdescr((StgPtr) p);
677 if (bd->gen != oldest_gen) {
678 return false;
679 } else if (bd->flags & BF_LARGE) {
680 if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
681 return false;
682 } else {
683 return ! (bd->flags & BF_MARKED);
684 }
685 } else {
686 struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
687 nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
688 return nonmovingGetMark(seg, block_idx) != nonmovingMarkEpoch;
689 }
690 }
691
692 /* Set the mark bit; only to be called *after* we have fully marked the closure */
finish_upd_rem_set_mark(StgClosure * p)693 STATIC_INLINE void finish_upd_rem_set_mark(StgClosure *p)
694 {
695 bdescr *bd = Bdescr((StgPtr) p);
696 if (bd->flags & BF_LARGE) {
697 // Someone else may have already marked it.
698 ACQUIRE_LOCK(&nonmoving_large_objects_mutex);
699 if (! (bd->flags & BF_MARKED)) {
700 bd->flags |= BF_MARKED;
701 dbl_link_remove(bd, &nonmoving_large_objects);
702 dbl_link_onto(bd, &nonmoving_marked_large_objects);
703 n_nonmoving_large_blocks -= bd->blocks;
704 n_nonmoving_marked_large_blocks += bd->blocks;
705 }
706 RELEASE_LOCK(&nonmoving_large_objects_mutex);
707 } else {
708 struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
709 nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
710 nonmovingSetMark(seg, block_idx);
711 }
712 }
713
updateRemembSetPushTSO(Capability * cap,StgTSO * tso)714 void updateRemembSetPushTSO(Capability *cap, StgTSO *tso)
715 {
716 if (needs_upd_rem_set_mark((StgClosure *) tso)) {
717 debugTrace(DEBUG_nonmoving_gc, "upd_rem_set: TSO %p", tso);
718 mark_tso(&cap->upd_rem_set.queue, tso);
719 finish_upd_rem_set_mark((StgClosure *) tso);
720 }
721 }
722
updateRemembSetPushStack(Capability * cap,StgStack * stack)723 void updateRemembSetPushStack(Capability *cap, StgStack *stack)
724 {
725 // N.B. caller responsible for checking nonmoving_write_barrier_enabled
726 if (needs_upd_rem_set_mark((StgClosure *) stack)) {
727 StgWord8 marking = stack->marking;
728 // See Note [StgStack dirtiness flags and concurrent marking]
729 if (cas_word8(&stack->marking, marking, nonmovingMarkEpoch)
730 != nonmovingMarkEpoch) {
731 // We have claimed the right to mark the stack.
732 debugTrace(DEBUG_nonmoving_gc, "upd_rem_set: STACK %p", stack->sp);
733 mark_stack(&cap->upd_rem_set.queue, stack);
734 finish_upd_rem_set_mark((StgClosure *) stack);
735 return;
736 } else {
737 // The concurrent GC has claimed the right to mark the stack.
738 // Wait until it finishes marking before proceeding with
739 // mutation.
740 while (needs_upd_rem_set_mark((StgClosure *) stack))
741 #if defined(PARALLEL_GC)
742 busy_wait_nop(); // TODO: Spinning here is unfortunate
743 #else
744 ;
745 #endif
746 return;
747 }
748 }
749 }
750
751 /*********************************************************
752 * Pushing to the mark queue
753 *********************************************************/
754
markQueuePush(MarkQueue * q,const MarkQueueEnt * ent)755 void markQueuePush (MarkQueue *q, const MarkQueueEnt *ent)
756 {
757 push(q, ent);
758 }
759
markQueuePushClosure(MarkQueue * q,StgClosure * p,StgClosure ** origin)760 void markQueuePushClosure (MarkQueue *q,
761 StgClosure *p,
762 StgClosure **origin)
763 {
764 // TODO: Push this into callers where they already have the Bdescr
765 if (check_in_nonmoving_heap(p)) {
766 push_closure(q, p, origin);
767 }
768 }
769
770 /* TODO: Do we really never want to specify the origin here? */
markQueueAddRoot(MarkQueue * q,StgClosure ** root)771 void markQueueAddRoot (MarkQueue* q, StgClosure** root)
772 {
773 markQueuePushClosure(q, *root, NULL);
774 }
775
776 /* Push a closure to the mark queue without origin information */
markQueuePushClosure_(MarkQueue * q,StgClosure * p)777 void markQueuePushClosure_ (MarkQueue *q, StgClosure *p)
778 {
779 markQueuePushClosure(q, p, NULL);
780 }
781
markQueuePushFunSrt(MarkQueue * q,const StgInfoTable * info)782 void markQueuePushFunSrt (MarkQueue *q, const StgInfoTable *info)
783 {
784 push_fun_srt(q, info);
785 }
786
markQueuePushThunkSrt(MarkQueue * q,const StgInfoTable * info)787 void markQueuePushThunkSrt (MarkQueue *q, const StgInfoTable *info)
788 {
789 push_thunk_srt(q, info);
790 }
791
markQueuePushArray(MarkQueue * q,const StgMutArrPtrs * array,StgWord start_index)792 void markQueuePushArray (MarkQueue *q,
793 const StgMutArrPtrs *array,
794 StgWord start_index)
795 {
796 push_array(q, array, start_index);
797 }
798
799 /*********************************************************
800 * Popping from the mark queue
801 *********************************************************/
802
803 // Returns invalid MarkQueueEnt if queue is empty.
markQueuePop_(MarkQueue * q)804 static MarkQueueEnt markQueuePop_ (MarkQueue *q)
805 {
806 MarkQueueBlock *top;
807
808 again:
809 top = q->top;
810
811 // Are we at the beginning of the block?
812 if (top->head == 0) {
813 // Is this the first block of the queue?
814 if (q->blocks->link == NULL) {
815 // Yes, therefore queue is empty...
816 MarkQueueEnt none = { .null_entry = { .p = NULL } };
817 return none;
818 } else {
819 // No, unwind to the previous block and try popping again...
820 bdescr *old_block = q->blocks;
821 q->blocks = old_block->link;
822 q->top = (MarkQueueBlock*)q->blocks->start;
823 ACQUIRE_SM_LOCK;
824 freeGroup(old_block); // TODO: hold on to a block to avoid repeated allocation/deallocation?
825 RELEASE_SM_LOCK;
826 goto again;
827 }
828 }
829
830 top->head--;
831 MarkQueueEnt ent = top->entries[top->head];
832 return ent;
833 }
834
markQueuePop(MarkQueue * q)835 static MarkQueueEnt markQueuePop (MarkQueue *q)
836 {
837 #if MARK_PREFETCH_QUEUE_DEPTH == 0
838 return markQueuePop_(q);
839 #else
840 unsigned int i = q->prefetch_head;
841 while (nonmovingMarkQueueEntryType(&q->prefetch_queue[i]) == NULL_ENTRY) {
842 MarkQueueEnt new = markQueuePop_(q);
843 if (nonmovingMarkQueueEntryType(&new) == NULL_ENTRY) {
844 // Mark queue is empty; look for any valid entries in the prefetch
845 // queue
846 for (unsigned int j = (i+1) % MARK_PREFETCH_QUEUE_DEPTH;
847 j != i;
848 j = (j+1) % MARK_PREFETCH_QUEUE_DEPTH)
849 {
850 if (nonmovingMarkQueueEntryType(&q->prefetch_queue[j]) != NULL_ENTRY) {
851 i = j;
852 goto done;
853 }
854 }
855 return new;
856 }
857
858 // The entry may not be a MARK_CLOSURE but it doesn't matter, our
859 // MarkQueueEnt encoding always places the pointer to the object to be
860 // marked first.
861 prefetchForRead(&new.mark_closure.p->header.info);
862 prefetchForRead(Bdescr((StgPtr) new.mark_closure.p));
863 q->prefetch_queue[i] = new;
864 i = (i + 1) % MARK_PREFETCH_QUEUE_DEPTH;
865 }
866
867 done:
868 ;
869 MarkQueueEnt ret = q->prefetch_queue[i];
870 q->prefetch_queue[i].null_entry.p = NULL;
871 q->prefetch_head = i;
872 return ret;
873 #endif
874 }
875
876 /*********************************************************
877 * Creating and destroying MarkQueues and UpdRemSets
878 *********************************************************/
879
880 /* Must hold sm_mutex. */
init_mark_queue_(MarkQueue * queue)881 static void init_mark_queue_ (MarkQueue *queue)
882 {
883 bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
884 queue->blocks = bd;
885 queue->top = (MarkQueueBlock *) bd->start;
886 queue->top->head = 0;
887 #if MARK_PREFETCH_QUEUE_DEPTH > 0
888 memset(&queue->prefetch_queue, 0, sizeof(queue->prefetch_queue));
889 queue->prefetch_head = 0;
890 #endif
891 }
892
893 /* Must hold sm_mutex. */
initMarkQueue(MarkQueue * queue)894 void initMarkQueue (MarkQueue *queue)
895 {
896 init_mark_queue_(queue);
897 queue->is_upd_rem_set = false;
898 }
899
900 /* Must hold sm_mutex. */
init_upd_rem_set(UpdRemSet * rset)901 void init_upd_rem_set (UpdRemSet *rset)
902 {
903 init_mark_queue_(&rset->queue);
904 rset->queue.is_upd_rem_set = true;
905 }
906
reset_upd_rem_set(UpdRemSet * rset)907 void reset_upd_rem_set (UpdRemSet *rset)
908 {
909 // UpdRemSets always have one block for the mark queue. This assertion is to
910 // update this code if we change that.
911 ASSERT(rset->queue.blocks->link == NULL);
912 rset->queue.top->head = 0;
913 }
914
freeMarkQueue(MarkQueue * queue)915 void freeMarkQueue (MarkQueue *queue)
916 {
917 freeChain_lock(queue->blocks);
918 }
919
920 #if defined(THREADED_RTS) && defined(DEBUG)
921 static uint32_t
markQueueLength(MarkQueue * q)922 markQueueLength (MarkQueue *q)
923 {
924 uint32_t n = 0;
925 for (bdescr *block = q->blocks; block; block = block->link) {
926 MarkQueueBlock *queue = (MarkQueueBlock*)block->start;
927 n += queue->head;
928 }
929 return n;
930 }
931 #endif
932
933
934 /*********************************************************
935 * Marking
936 *********************************************************/
937
938 /*
939 * N.B. Mutation of TRecHeaders is completely unprotected by any write
940 * barrier. Consequently it's quite important that we deeply mark
941 * any outstanding transactions.
942 */
943 static void
mark_trec_header(MarkQueue * queue,StgTRecHeader * trec)944 mark_trec_header (MarkQueue *queue, StgTRecHeader *trec)
945 {
946 while (trec != NO_TREC) {
947 StgTRecChunk *chunk = trec->current_chunk;
948 markQueuePushClosure_(queue, (StgClosure *) trec);
949 markQueuePushClosure_(queue, (StgClosure *) chunk);
950 while (chunk != END_STM_CHUNK_LIST) {
951 for (StgWord i=0; i < chunk->next_entry_idx; i++) {
952 TRecEntry *ent = &chunk->entries[i];
953 markQueuePushClosure_(queue, (StgClosure *) ent->tvar);
954 markQueuePushClosure_(queue, ent->expected_value);
955 markQueuePushClosure_(queue, ent->new_value);
956 }
957 chunk = chunk->prev_chunk;
958 }
959 trec = trec->enclosing_trec;
960 }
961 }
962
963 static void
mark_tso(MarkQueue * queue,StgTSO * tso)964 mark_tso (MarkQueue *queue, StgTSO *tso)
965 {
966 // TODO: Clear dirty if contains only old gen objects
967
968 if (tso->bound != NULL) {
969 markQueuePushClosure_(queue, (StgClosure *) tso->bound->tso);
970 }
971
972 markQueuePushClosure_(queue, (StgClosure *) tso->blocked_exceptions);
973 markQueuePushClosure_(queue, (StgClosure *) tso->bq);
974 mark_trec_header(queue, tso->trec);
975 markQueuePushClosure_(queue, (StgClosure *) tso->stackobj);
976 markQueuePushClosure_(queue, (StgClosure *) tso->_link);
977 if ( tso->why_blocked == BlockedOnMVar
978 || tso->why_blocked == BlockedOnMVarRead
979 || tso->why_blocked == BlockedOnBlackHole
980 || tso->why_blocked == BlockedOnMsgThrowTo
981 || tso->why_blocked == NotBlocked
982 ) {
983 markQueuePushClosure_(queue, tso->block_info.closure);
984 }
985 }
986
987 static void
do_push_closure(StgClosure ** p,void * user)988 do_push_closure (StgClosure **p, void *user)
989 {
990 MarkQueue *queue = (MarkQueue *) user;
991 // TODO: Origin? need reference to containing closure
992 markQueuePushClosure_(queue, *p);
993 }
994
995 static void
mark_large_bitmap(MarkQueue * queue,StgClosure ** p,StgLargeBitmap * large_bitmap,StgWord size)996 mark_large_bitmap (MarkQueue *queue,
997 StgClosure **p,
998 StgLargeBitmap *large_bitmap,
999 StgWord size)
1000 {
1001 walk_large_bitmap(do_push_closure, p, large_bitmap, size, queue);
1002 }
1003
1004 static void
mark_small_bitmap(MarkQueue * queue,StgClosure ** p,StgWord size,StgWord bitmap)1005 mark_small_bitmap (MarkQueue *queue, StgClosure **p, StgWord size, StgWord bitmap)
1006 {
1007 while (size > 0) {
1008 if ((bitmap & 1) == 0) {
1009 // TODO: Origin?
1010 markQueuePushClosure(queue, *p, NULL);
1011 }
1012 p++;
1013 bitmap = bitmap >> 1;
1014 size--;
1015 }
1016 }
1017
1018 static GNUC_ATTR_HOT
mark_PAP_payload(MarkQueue * queue,StgClosure * fun,StgClosure ** payload,StgWord size)1019 void mark_PAP_payload (MarkQueue *queue,
1020 StgClosure *fun,
1021 StgClosure **payload,
1022 StgWord size)
1023 {
1024 const StgFunInfoTable *fun_info = get_fun_itbl(UNTAG_CONST_CLOSURE(fun));
1025 ASSERT(fun_info->i.type != PAP);
1026 StgPtr p = (StgPtr) payload;
1027
1028 StgWord bitmap;
1029 switch (fun_info->f.fun_type) {
1030 case ARG_GEN:
1031 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1032 goto small_bitmap;
1033 case ARG_GEN_BIG:
1034 mark_large_bitmap(queue, payload, GET_FUN_LARGE_BITMAP(fun_info), size);
1035 break;
1036 case ARG_BCO:
1037 mark_large_bitmap(queue, payload, BCO_BITMAP(fun), size);
1038 break;
1039 default:
1040 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1041 small_bitmap:
1042 mark_small_bitmap(queue, (StgClosure **) p, size, bitmap);
1043 break;
1044 }
1045 }
1046
1047 /* Helper for mark_stack; returns next stack frame. */
1048 static StgPtr
mark_arg_block(MarkQueue * queue,const StgFunInfoTable * fun_info,StgClosure ** args)1049 mark_arg_block (MarkQueue *queue, const StgFunInfoTable *fun_info, StgClosure **args)
1050 {
1051 StgWord bitmap, size;
1052
1053 StgPtr p = (StgPtr)args;
1054 switch (fun_info->f.fun_type) {
1055 case ARG_GEN:
1056 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1057 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1058 goto small_bitmap;
1059 case ARG_GEN_BIG:
1060 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1061 mark_large_bitmap(queue, (StgClosure**)p, GET_FUN_LARGE_BITMAP(fun_info), size);
1062 p += size;
1063 break;
1064 default:
1065 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1066 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1067 small_bitmap:
1068 mark_small_bitmap(queue, (StgClosure**)p, size, bitmap);
1069 p += size;
1070 break;
1071 }
1072 return p;
1073 }
1074
1075 static GNUC_ATTR_HOT void
mark_stack_(MarkQueue * queue,StgPtr sp,StgPtr spBottom)1076 mark_stack_ (MarkQueue *queue, StgPtr sp, StgPtr spBottom)
1077 {
1078 ASSERT(sp <= spBottom);
1079
1080 while (sp < spBottom) {
1081 const StgRetInfoTable *info = get_ret_itbl((StgClosure *)sp);
1082 switch (info->i.type) {
1083 case UPDATE_FRAME:
1084 {
1085 // See Note [upd-black-hole] in rts/Scav.c
1086 StgUpdateFrame *frame = (StgUpdateFrame *) sp;
1087 markQueuePushClosure_(queue, frame->updatee);
1088 sp += sizeofW(StgUpdateFrame);
1089 continue;
1090 }
1091
1092 // small bitmap (< 32 entries, or 64 on a 64-bit machine)
1093 case CATCH_STM_FRAME:
1094 case CATCH_RETRY_FRAME:
1095 case ATOMICALLY_FRAME:
1096 case UNDERFLOW_FRAME:
1097 case STOP_FRAME:
1098 case CATCH_FRAME:
1099 case RET_SMALL:
1100 {
1101 StgWord bitmap = BITMAP_BITS(info->i.layout.bitmap);
1102 StgWord size = BITMAP_SIZE(info->i.layout.bitmap);
1103 // NOTE: the payload starts immediately after the info-ptr, we
1104 // don't have an StgHeader in the same sense as a heap closure.
1105 sp++;
1106 mark_small_bitmap(queue, (StgClosure **) sp, size, bitmap);
1107 sp += size;
1108 }
1109 follow_srt:
1110 if (info->i.srt) {
1111 markQueuePushClosure_(queue, (StgClosure*)GET_SRT(info));
1112 }
1113 continue;
1114
1115 case RET_BCO: {
1116 sp++;
1117 markQueuePushClosure_(queue, *(StgClosure**)sp);
1118 StgBCO *bco = (StgBCO *)*sp;
1119 sp++;
1120 StgWord size = BCO_BITMAP_SIZE(bco);
1121 mark_large_bitmap(queue, (StgClosure **) sp, BCO_BITMAP(bco), size);
1122 sp += size;
1123 continue;
1124 }
1125
1126 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1127 case RET_BIG:
1128 {
1129 StgWord size;
1130
1131 size = GET_LARGE_BITMAP(&info->i)->size;
1132 sp++;
1133 mark_large_bitmap(queue, (StgClosure **) sp, GET_LARGE_BITMAP(&info->i), size);
1134 sp += size;
1135 // and don't forget to follow the SRT
1136 goto follow_srt;
1137 }
1138
1139 case RET_FUN:
1140 {
1141 StgRetFun *ret_fun = (StgRetFun *)sp;
1142 const StgFunInfoTable *fun_info;
1143
1144 markQueuePushClosure_(queue, ret_fun->fun);
1145 fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun));
1146 sp = mark_arg_block(queue, fun_info, ret_fun->payload);
1147 goto follow_srt;
1148 }
1149
1150 default:
1151 barf("mark_stack: weird activation record found on stack: %d", (int)(info->i.type));
1152 }
1153 }
1154 }
1155
1156 static GNUC_ATTR_HOT void
mark_stack(MarkQueue * queue,StgStack * stack)1157 mark_stack (MarkQueue *queue, StgStack *stack)
1158 {
1159 // TODO: Clear dirty if contains only old gen objects
1160
1161 mark_stack_(queue, stack->sp, stack->stack + stack->stack_size);
1162 }
1163
1164 /* See Note [Static objects under the nonmoving collector].
1165 *
1166 * Returns true if the object needs to be marked.
1167 */
1168 static bool
bump_static_flag(StgClosure ** link_field,StgClosure * q STG_UNUSED)1169 bump_static_flag(StgClosure **link_field, StgClosure *q STG_UNUSED)
1170 {
1171 while (1) {
1172 StgWord link = (StgWord) *link_field;
1173 StgWord new = (link & ~STATIC_BITS) | static_flag;
1174 if ((link & STATIC_BITS) == static_flag)
1175 return false;
1176 else if (cas((StgVolatilePtr) link_field, link, new) == link) {
1177 return true;
1178 }
1179 }
1180 }
1181
1182 /* N.B. p0 may be tagged */
1183 static GNUC_ATTR_HOT void
mark_closure(MarkQueue * queue,const StgClosure * p0,StgClosure ** origin)1184 mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
1185 {
1186 StgClosure *p = (StgClosure*)p0;
1187
1188 try_again:
1189 ;
1190 bdescr *bd = NULL;
1191 StgClosure *p_next = NULL;
1192 StgWord tag = GET_CLOSURE_TAG(p);
1193 p = UNTAG_CLOSURE(p);
1194
1195 # define PUSH_FIELD(obj, field) \
1196 markQueuePushClosure(queue, \
1197 (StgClosure *) (obj)->field, \
1198 (StgClosure **) &(obj)->field)
1199
1200 if (!HEAP_ALLOCED_GC(p)) {
1201 const StgInfoTable *info = get_itbl(p);
1202 StgHalfWord type = info->type;
1203
1204 if (type == CONSTR_0_1 || type == CONSTR_0_2 || type == CONSTR_NOCAF) {
1205 // no need to put these on the static linked list, they don't need
1206 // to be marked.
1207 return;
1208 }
1209
1210 switch (type) {
1211
1212 case THUNK_STATIC:
1213 if (info->srt != 0) {
1214 if (bump_static_flag(THUNK_STATIC_LINK((StgClosure *)p), p)) {
1215 markQueuePushThunkSrt(queue, info); // TODO this function repeats the check above
1216 }
1217 }
1218 goto done;
1219
1220 case FUN_STATIC:
1221 if (info->srt != 0 || info->layout.payload.ptrs != 0) {
1222 if (bump_static_flag(STATIC_LINK(info, (StgClosure *)p), p)) {
1223 markQueuePushFunSrt(queue, info); // TODO this function repeats the check above
1224
1225 // a FUN_STATIC can also be an SRT, so it may have pointer
1226 // fields. See Note [SRTs] in CmmBuildInfoTables, specifically
1227 // the [FUN] optimisation.
1228 // TODO (osa) I don't understand this comment
1229 for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) {
1230 PUSH_FIELD(p, payload[i]);
1231 }
1232 }
1233 }
1234 goto done;
1235
1236 case IND_STATIC:
1237 if (bump_static_flag(IND_STATIC_LINK((StgClosure *)p), p)) {
1238 PUSH_FIELD((StgInd *) p, indirectee);
1239 }
1240 goto done;
1241
1242 case CONSTR:
1243 case CONSTR_1_0:
1244 case CONSTR_2_0:
1245 case CONSTR_1_1:
1246 if (bump_static_flag(STATIC_LINK(info, (StgClosure *)p), p)) {
1247 for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) {
1248 PUSH_FIELD(p, payload[i]);
1249 }
1250 }
1251 goto done;
1252
1253 case WHITEHOLE:
1254 while (*(StgInfoTable* volatile*) &p->header.info == &stg_WHITEHOLE_info);
1255 // busy_wait_nop(); // FIXME
1256 goto try_again;
1257
1258 default:
1259 barf("mark_closure(static): strange closure type %d", (int)(info->type));
1260 }
1261 }
1262
1263 bd = Bdescr((StgPtr) p);
1264
1265 if (bd->gen != oldest_gen) {
1266 // Here we have an object living outside of the non-moving heap. While
1267 // we likely evacuated nearly everything to the nonmoving heap during
1268 // preparation there are nevertheless a few ways in which we might trace
1269 // a reference into younger generations:
1270 //
1271 // * a mutable object might have been updated
1272 // * we might have aged an object
1273 goto done;
1274 }
1275
1276 ASSERTM(LOOKS_LIKE_CLOSURE_PTR(p), "invalid closure, info=%p", p->header.info);
1277
1278 ASSERT(!IS_FORWARDING_PTR(p->header.info));
1279
1280 // N.B. only the first block of a compact region is guaranteed to carry
1281 // BF_NONMOVING; conseqently we must separately check for BF_COMPACT.
1282 if (bd->flags & (BF_COMPACT | BF_NONMOVING)) {
1283
1284 if (bd->flags & BF_COMPACT) {
1285 StgCompactNFData *str = objectGetCompact((StgClosure*)p);
1286 bd = Bdescr((P_)str);
1287
1288 if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
1289 // Not in the snapshot
1290 return;
1291 }
1292
1293 if (! (bd->flags & BF_MARKED)) {
1294 dbl_link_remove(bd, &nonmoving_compact_objects);
1295 dbl_link_onto(bd, &nonmoving_marked_compact_objects);
1296 StgWord blocks = str->totalW / BLOCK_SIZE_W;
1297 n_nonmoving_compact_blocks -= blocks;
1298 n_nonmoving_marked_compact_blocks += blocks;
1299 bd->flags |= BF_MARKED;
1300 }
1301
1302 // N.B. the object being marked is in a compact region so by
1303 // definition there is no need to do any tracing here.
1304 goto done;
1305 } else if (bd->flags & BF_LARGE) {
1306 if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
1307 // Not in the snapshot
1308 goto done;
1309 }
1310 if (bd->flags & BF_MARKED) {
1311 goto done;
1312 }
1313 } else {
1314 struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
1315 nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
1316
1317 /* We don't mark blocks that,
1318 * - were not live at the time that the snapshot was taken, or
1319 * - we have already marked this cycle
1320 */
1321 uint8_t mark = nonmovingGetMark(seg, block_idx);
1322 /* Don't mark things we've already marked (since we may loop) */
1323 if (mark == nonmovingMarkEpoch)
1324 goto done;
1325
1326 StgClosure *snapshot_loc =
1327 (StgClosure *) nonmovingSegmentGetBlock(seg, nonmovingSegmentInfo(seg)->next_free_snap);
1328 if (p >= snapshot_loc && mark == 0) {
1329 /*
1330 * In this case we are looking at a block that wasn't allocated
1331 * at the time that the snapshot was taken. We mustn't trace
1332 * things above the allocation pointer that aren't marked since
1333 * they may not be valid objects.
1334 */
1335 goto done;
1336 }
1337 }
1338 }
1339
1340 // A pinned object that is still attached to a capability (because it's not
1341 // filled yet). No need to trace it pinned objects can't contain poiners.
1342 else if (bd->flags & BF_PINNED) {
1343 #if defined(DEBUG)
1344 bool found_it = false;
1345 for (uint32_t i = 0; i < n_capabilities; ++i) {
1346 if (capabilities[i]->pinned_object_block == bd) {
1347 found_it = true;
1348 break;
1349 }
1350 }
1351 ASSERT(found_it);
1352 #endif
1353 return; // we don't update origin here! TODO(osa): explain this
1354 }
1355
1356 else {
1357 barf("Strange closure in nonmoving mark: %p", p);
1358 }
1359
1360 /////////////////////////////////////////////////////
1361 // Trace pointers
1362 /////////////////////////////////////////////////////
1363
1364 const StgInfoTable *info = get_itbl(p);
1365 switch (info->type) {
1366
1367 case MVAR_CLEAN:
1368 case MVAR_DIRTY: {
1369 StgMVar *mvar = (StgMVar *) p;
1370 PUSH_FIELD(mvar, head);
1371 PUSH_FIELD(mvar, tail);
1372 PUSH_FIELD(mvar, value);
1373 break;
1374 }
1375
1376 case TVAR: {
1377 StgTVar *tvar = ((StgTVar *)p);
1378 PUSH_FIELD(tvar, current_value);
1379 PUSH_FIELD(tvar, first_watch_queue_entry);
1380 break;
1381 }
1382
1383 case FUN_2_0:
1384 markQueuePushFunSrt(queue, info);
1385 PUSH_FIELD(p, payload[1]);
1386 PUSH_FIELD(p, payload[0]);
1387 break;
1388
1389 case THUNK_2_0: {
1390 StgThunk *thunk = (StgThunk *) p;
1391 markQueuePushThunkSrt(queue, info);
1392 PUSH_FIELD(thunk, payload[1]);
1393 PUSH_FIELD(thunk, payload[0]);
1394 break;
1395 }
1396
1397 case CONSTR_2_0:
1398 PUSH_FIELD(p, payload[1]);
1399 PUSH_FIELD(p, payload[0]);
1400 break;
1401
1402 case THUNK_1_0:
1403 markQueuePushThunkSrt(queue, info);
1404 PUSH_FIELD((StgThunk *) p, payload[0]);
1405 break;
1406
1407 case FUN_1_0:
1408 markQueuePushFunSrt(queue, info);
1409 PUSH_FIELD(p, payload[0]);
1410 break;
1411
1412 case CONSTR_1_0:
1413 PUSH_FIELD(p, payload[0]);
1414 break;
1415
1416 case THUNK_0_1:
1417 markQueuePushThunkSrt(queue, info);
1418 break;
1419
1420 case FUN_0_1:
1421 markQueuePushFunSrt(queue, info);
1422 break;
1423
1424 case CONSTR_0_1:
1425 case CONSTR_0_2:
1426 break;
1427
1428 case THUNK_0_2:
1429 markQueuePushThunkSrt(queue, info);
1430 break;
1431
1432 case FUN_0_2:
1433 markQueuePushFunSrt(queue, info);
1434 break;
1435
1436 case THUNK_1_1:
1437 markQueuePushThunkSrt(queue, info);
1438 PUSH_FIELD((StgThunk *) p, payload[0]);
1439 break;
1440
1441 case FUN_1_1:
1442 markQueuePushFunSrt(queue, info);
1443 PUSH_FIELD(p, payload[0]);
1444 break;
1445
1446 case CONSTR_1_1:
1447 PUSH_FIELD(p, payload[0]);
1448 break;
1449
1450 case FUN:
1451 markQueuePushFunSrt(queue, info);
1452 goto gen_obj;
1453
1454 case THUNK: {
1455 markQueuePushThunkSrt(queue, info);
1456 for (StgWord i = 0; i < info->layout.payload.ptrs; i++) {
1457 StgClosure **field = &((StgThunk *) p)->payload[i];
1458 markQueuePushClosure(queue, *field, field);
1459 }
1460 break;
1461 }
1462
1463 gen_obj:
1464 case CONSTR:
1465 case CONSTR_NOCAF:
1466 case WEAK:
1467 case PRIM:
1468 {
1469 for (StgWord i = 0; i < info->layout.payload.ptrs; i++) {
1470 StgClosure **field = &((StgClosure *) p)->payload[i];
1471 markQueuePushClosure(queue, *field, field);
1472 }
1473 break;
1474 }
1475
1476 case BCO: {
1477 StgBCO *bco = (StgBCO *)p;
1478 PUSH_FIELD(bco, instrs);
1479 PUSH_FIELD(bco, literals);
1480 PUSH_FIELD(bco, ptrs);
1481 break;
1482 }
1483
1484
1485 case IND: {
1486 PUSH_FIELD((StgInd *) p, indirectee);
1487 if (origin != NULL) {
1488 p_next = ((StgInd*)p)->indirectee;
1489 }
1490 break;
1491 }
1492
1493 case BLACKHOLE: {
1494 PUSH_FIELD((StgInd *) p, indirectee);
1495 StgClosure *indirectee = ((StgInd*)p)->indirectee;
1496 if (GET_CLOSURE_TAG(indirectee) == 0 || origin == NULL) {
1497 // do nothing
1498 } else {
1499 p_next = indirectee;
1500 }
1501 break;
1502 }
1503
1504 case MUT_VAR_CLEAN:
1505 case MUT_VAR_DIRTY:
1506 PUSH_FIELD((StgMutVar *)p, var);
1507 break;
1508
1509 case BLOCKING_QUEUE: {
1510 StgBlockingQueue *bq = (StgBlockingQueue *)p;
1511 PUSH_FIELD(bq, bh);
1512 PUSH_FIELD(bq, owner);
1513 PUSH_FIELD(bq, queue);
1514 PUSH_FIELD(bq, link);
1515 break;
1516 }
1517
1518 case THUNK_SELECTOR:
1519 if (RtsFlags.GcFlags.nonmovingSelectorOpt) {
1520 nonmoving_eval_thunk_selector(queue, (StgSelector*)p, origin);
1521 } else {
1522 PUSH_FIELD((StgSelector *) p, selectee);
1523 }
1524 break;
1525
1526 case AP_STACK: {
1527 StgAP_STACK *ap = (StgAP_STACK *)p;
1528 PUSH_FIELD(ap, fun);
1529 mark_stack_(queue, (StgPtr) ap->payload, (StgPtr) ap->payload + ap->size);
1530 break;
1531 }
1532
1533 case PAP: {
1534 StgPAP *pap = (StgPAP *) p;
1535 PUSH_FIELD(pap, fun);
1536 mark_PAP_payload(queue, pap->fun, pap->payload, pap->n_args);
1537 break;
1538 }
1539
1540 case AP: {
1541 StgAP *ap = (StgAP *) p;
1542 PUSH_FIELD(ap, fun);
1543 mark_PAP_payload(queue, ap->fun, ap->payload, ap->n_args);
1544 break;
1545 }
1546
1547 case ARR_WORDS:
1548 // nothing to follow
1549 break;
1550
1551 case MUT_ARR_PTRS_CLEAN:
1552 case MUT_ARR_PTRS_DIRTY:
1553 case MUT_ARR_PTRS_FROZEN_CLEAN:
1554 case MUT_ARR_PTRS_FROZEN_DIRTY:
1555 // TODO: Check this against Scav.c
1556 markQueuePushArray(queue, (StgMutArrPtrs *) p, 0);
1557 break;
1558
1559 case SMALL_MUT_ARR_PTRS_CLEAN:
1560 case SMALL_MUT_ARR_PTRS_DIRTY:
1561 case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
1562 case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY: {
1563 StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs *) p;
1564 for (StgWord i = 0; i < arr->ptrs; i++) {
1565 StgClosure **field = &arr->payload[i];
1566 markQueuePushClosure(queue, *field, field);
1567 }
1568 break;
1569 }
1570
1571 case TSO:
1572 mark_tso(queue, (StgTSO *) p);
1573 break;
1574
1575 case STACK: {
1576 // See Note [StgStack dirtiness flags and concurrent marking]
1577 StgStack *stack = (StgStack *) p;
1578 StgWord8 marking = stack->marking;
1579
1580 // N.B. stack->marking must be != nonmovingMarkEpoch unless
1581 // someone has already marked it.
1582 if (cas_word8(&stack->marking, marking, nonmovingMarkEpoch)
1583 != nonmovingMarkEpoch) {
1584 // We have claimed the right to mark the stack.
1585 mark_stack(queue, stack);
1586 } else {
1587 // A mutator has already started marking the stack; we just let it
1588 // do its thing and move on. There's no reason to wait; we know that
1589 // the stack will be fully marked before we sweep due to the final
1590 // post-mark synchronization. Most importantly, we do not set its
1591 // mark bit, the mutator is responsible for this.
1592 goto done;
1593 }
1594 break;
1595 }
1596
1597 case MUT_PRIM: {
1598 for (StgHalfWord p_idx = 0; p_idx < info->layout.payload.ptrs; ++p_idx) {
1599 StgClosure **field = &p->payload[p_idx];
1600 markQueuePushClosure(queue, *field, field);
1601 }
1602 break;
1603 }
1604
1605 case TREC_CHUNK: {
1606 // TODO: Should we abort here? This should have already been marked
1607 // when we dirtied the TSO
1608 StgTRecChunk *tc = ((StgTRecChunk *) p);
1609 PUSH_FIELD(tc, prev_chunk);
1610 TRecEntry *end = &tc->entries[tc->next_entry_idx];
1611 for (TRecEntry *e = &tc->entries[0]; e < end; e++) {
1612 markQueuePushClosure_(queue, (StgClosure *) e->tvar);
1613 markQueuePushClosure_(queue, (StgClosure *) e->expected_value);
1614 markQueuePushClosure_(queue, (StgClosure *) e->new_value);
1615 }
1616 break;
1617 }
1618
1619 case WHITEHOLE:
1620 while (*(StgInfoTable* volatile*) &p->header.info == &stg_WHITEHOLE_info);
1621 goto try_again;
1622
1623 case COMPACT_NFDATA:
1624 break;
1625
1626 default:
1627 barf("mark_closure: unimplemented/strange closure type %d @ %p",
1628 info->type, p);
1629 }
1630
1631 # undef PUSH_FIELD
1632
1633 /* Set the mark bit: it's important that we do this only after we actually push
1634 * the object's pointers since in the case of marking stacks there may be a
1635 * mutator waiting for us to finish so it can start execution.
1636 */
1637 if (bd->flags & BF_LARGE) {
1638 /* Marking a large object isn't idempotent since we move it to
1639 * nonmoving_marked_large_objects; to ensure that we don't repeatedly
1640 * mark a large object, we only set BF_MARKED on large objects in the
1641 * nonmoving heap while holding nonmoving_large_objects_mutex
1642 */
1643 ACQUIRE_LOCK(&nonmoving_large_objects_mutex);
1644 if (! (bd->flags & BF_MARKED)) {
1645 // Remove the object from nonmoving_large_objects and link it to
1646 // nonmoving_marked_large_objects
1647 dbl_link_remove(bd, &nonmoving_large_objects);
1648 dbl_link_onto(bd, &nonmoving_marked_large_objects);
1649 n_nonmoving_large_blocks -= bd->blocks;
1650 n_nonmoving_marked_large_blocks += bd->blocks;
1651 bd->flags |= BF_MARKED;
1652 }
1653 RELEASE_LOCK(&nonmoving_large_objects_mutex);
1654 } else if (bd->flags & BF_NONMOVING) {
1655 // TODO: Kill repetition
1656 struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
1657 nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
1658 nonmovingSetMark(seg, block_idx);
1659 nonmoving_live_words += nonmovingSegmentBlockSize(seg) / sizeof(W_);
1660 }
1661
1662 // If we found a indirection to shortcut keep going.
1663 if (p_next) {
1664 p = p_next;
1665 goto try_again;
1666 }
1667
1668 done:
1669 if (origin != NULL && (!HEAP_ALLOCED(p) || bd->flags & BF_NONMOVING)) {
1670 if (UNTAG_CLOSURE((StgClosure*)p0) != p && *origin == p0) {
1671 if (cas((StgVolatilePtr)origin, (StgWord)p0, (StgWord)TAG_CLOSURE(tag, p)) == (StgWord)p0) {
1672 // debugBelch("Thunk optimization successful\n");
1673 }
1674 }
1675 }
1676 }
1677
1678 /* This is the main mark loop.
1679 * Invariants:
1680 *
1681 * a. nonmovingPrepareMark has been called.
1682 * b. the nursery has been fully evacuated into the non-moving generation.
1683 * c. the mark queue has been seeded with a set of roots.
1684 *
1685 */
1686 GNUC_ATTR_HOT void
nonmovingMark(MarkQueue * queue)1687 nonmovingMark (MarkQueue *queue)
1688 {
1689 traceConcMarkBegin();
1690 debugTrace(DEBUG_nonmoving_gc, "Starting mark pass");
1691 unsigned int count = 0;
1692 while (true) {
1693 count++;
1694 MarkQueueEnt ent = markQueuePop(queue);
1695
1696 switch (nonmovingMarkQueueEntryType(&ent)) {
1697 case MARK_CLOSURE:
1698 mark_closure(queue, ent.mark_closure.p, ent.mark_closure.origin);
1699 break;
1700 case MARK_ARRAY: {
1701 const StgMutArrPtrs *arr = (const StgMutArrPtrs *)
1702 UNTAG_CLOSURE((StgClosure *) ent.mark_array.array);
1703 StgWord start = ent.mark_array.start_index;
1704 StgWord end = start + MARK_ARRAY_CHUNK_LENGTH;
1705 if (end < arr->ptrs) {
1706 // There is more to be marked after this chunk.
1707 markQueuePushArray(queue, arr, end);
1708 } else {
1709 end = arr->ptrs;
1710 }
1711 for (StgWord i = start; i < end; i++) {
1712 markQueuePushClosure_(queue, arr->payload[i]);
1713 }
1714 break;
1715 }
1716 case NULL_ENTRY:
1717 // Perhaps the update remembered set has more to mark...
1718 if (upd_rem_set_block_list) {
1719 ACQUIRE_LOCK(&upd_rem_set_lock);
1720 bdescr *old = queue->blocks;
1721 queue->blocks = upd_rem_set_block_list;
1722 queue->top = (MarkQueueBlock *) queue->blocks->start;
1723 upd_rem_set_block_list = NULL;
1724 RELEASE_LOCK(&upd_rem_set_lock);
1725
1726 ACQUIRE_SM_LOCK;
1727 freeGroup(old);
1728 RELEASE_SM_LOCK;
1729 } else {
1730 // Nothing more to do
1731 debugTrace(DEBUG_nonmoving_gc, "Finished mark pass: %d", count);
1732 traceConcMarkEnd(count);
1733 return;
1734 }
1735 }
1736 }
1737 }
1738
1739 // A variant of `isAlive` that works for non-moving heap. Used for:
1740 //
1741 // - Collecting weak pointers; checking key of a weak pointer.
1742 // - Resurrecting threads; checking if a thread is dead.
1743 // - Sweeping object lists: large_objects, mut_list, stable_name_table.
1744 //
1745 // This may only be used after a full mark but before nonmovingSweep as it
1746 // relies on the correctness of the next_free_snap and mark bitmaps.
nonmovingIsAlive(StgClosure * p)1747 bool nonmovingIsAlive (StgClosure *p)
1748 {
1749 // Ignore static closures. See comments in `isAlive`.
1750 if (!HEAP_ALLOCED_GC(p)) {
1751 return true;
1752 }
1753
1754 bdescr *bd = Bdescr((P_)p);
1755
1756 // All non-static objects in the non-moving heap should be marked as
1757 // BF_NONMOVING
1758 ASSERT(bd->flags & BF_NONMOVING);
1759
1760 if (bd->flags & (BF_COMPACT | BF_LARGE)) {
1761 if (bd->flags & BF_COMPACT) {
1762 StgCompactNFData *str = objectGetCompact((StgClosure*)p);
1763 bd = Bdescr((P_)str);
1764 }
1765 return (bd->flags & BF_NONMOVING_SWEEPING) == 0
1766 // the large object wasn't in the snapshot and therefore wasn't marked
1767 || (bd->flags & BF_MARKED) != 0;
1768 // The object was marked
1769 } else {
1770 struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
1771 nonmoving_block_idx i = nonmovingGetBlockIdx((StgPtr) p);
1772 uint8_t mark = nonmovingGetMark(seg, i);
1773 if (i >= nonmovingSegmentInfo(seg)->next_free_snap) {
1774 // If the object is allocated after next_free_snap then one of the
1775 // following must be true:
1776 //
1777 // * if its mark is 0 then the block was not allocated last time
1778 // the segment was swept; however, it may have been allocated since
1779 // then and therefore we must conclude that the block is alive.
1780 //
1781 // * if its mark is equal to nonmovingMarkEpoch then we found that
1782 // the object was alive in the snapshot of the current GC (recall
1783 // that this function may only be used after a mark).
1784 // Consequently we must conclude that the object is still alive.
1785 //
1786 // * if its mark is not equal to nonmovingMarkEpoch then we found
1787 // that the object was not reachable in the last snapshot.
1788 // Assuming that the mark is complete we can conclude that the
1789 // object is dead since the snapshot invariant guarantees that
1790 // all objects alive in the snapshot would be marked.
1791 //
1792 return mark == nonmovingMarkEpoch || mark == 0;
1793 } else {
1794 // If the object is below next_free_snap then the snapshot
1795 // invariant guarantees that it is marked if reachable.
1796 return mark == nonmovingMarkEpoch;
1797 }
1798 }
1799 }
1800
1801 // Check whether a snapshotted object is alive. That is for an object that we
1802 // know to be in the snapshot, is its mark bit set. It is imperative that the
1803 // object is in the snapshot (e.g. was in the nonmoving heap at the time that
1804 // the snapshot was taken) since we assume that its mark bit reflects its
1805 // reachability.
1806 //
1807 // This is used when
1808 //
1809 // - Collecting weak pointers; checking key of a weak pointer.
1810 // - Resurrecting threads; checking if a thread is dead.
1811 // - Sweeping object lists: large_objects, mut_list, stable_name_table.
1812 //
nonmovingIsNowAlive(StgClosure * p)1813 static bool nonmovingIsNowAlive (StgClosure *p)
1814 {
1815 // Ignore static closures. See comments in `isAlive`.
1816 if (!HEAP_ALLOCED_GC(p)) {
1817 return true;
1818 }
1819
1820 bdescr *bd = Bdescr((P_)p);
1821
1822 // All non-static objects in the non-moving heap should be marked as
1823 // BF_NONMOVING
1824 ASSERT(bd->flags & BF_NONMOVING);
1825
1826 if (bd->flags & BF_LARGE) {
1827 return (bd->flags & BF_NONMOVING_SWEEPING) == 0
1828 // the large object wasn't in the snapshot and therefore wasn't marked
1829 || (bd->flags & BF_MARKED) != 0;
1830 // The object was marked
1831 } else {
1832 return nonmovingClosureMarkedThisCycle((P_)p);
1833 }
1834 }
1835
1836 // Non-moving heap variant of `tidyWeakList`
nonmovingTidyWeaks(struct MarkQueue_ * queue)1837 bool nonmovingTidyWeaks (struct MarkQueue_ *queue)
1838 {
1839 bool did_work = false;
1840
1841 StgWeak **last_w = &nonmoving_old_weak_ptr_list;
1842 StgWeak *next_w;
1843 for (StgWeak *w = nonmoving_old_weak_ptr_list; w != NULL; w = next_w) {
1844 if (w->header.info == &stg_DEAD_WEAK_info) {
1845 // finalizeWeak# was called on the weak
1846 next_w = w->link;
1847 *last_w = next_w;
1848 continue;
1849 }
1850
1851 // Otherwise it's a live weak
1852 ASSERT(w->header.info == &stg_WEAK_info);
1853
1854 if (nonmovingIsNowAlive(w->key)) {
1855 nonmovingMarkLiveWeak(queue, w);
1856 did_work = true;
1857
1858 // remove this weak ptr from old_weak_ptr list
1859 *last_w = w->link;
1860 next_w = w->link;
1861
1862 // and put it on the weak ptr list
1863 w->link = nonmoving_weak_ptr_list;
1864 nonmoving_weak_ptr_list = w;
1865 } else {
1866 last_w = &(w->link);
1867 next_w = w->link;
1868 }
1869 }
1870
1871 return did_work;
1872 }
1873
nonmovingMarkDeadWeak(struct MarkQueue_ * queue,StgWeak * w)1874 void nonmovingMarkDeadWeak (struct MarkQueue_ *queue, StgWeak *w)
1875 {
1876 if (w->cfinalizers != &stg_NO_FINALIZER_closure) {
1877 markQueuePushClosure_(queue, w->value);
1878 }
1879 markQueuePushClosure_(queue, w->finalizer);
1880 }
1881
nonmovingMarkLiveWeak(struct MarkQueue_ * queue,StgWeak * w)1882 void nonmovingMarkLiveWeak (struct MarkQueue_ *queue, StgWeak *w)
1883 {
1884 ASSERT(nonmovingClosureMarkedThisCycle((P_)w));
1885 markQueuePushClosure_(queue, w->value);
1886 markQueuePushClosure_(queue, w->finalizer);
1887 markQueuePushClosure_(queue, w->cfinalizers);
1888 }
1889
1890 // When we're done with marking, any weak pointers with non-marked keys will be
1891 // considered "dead". We mark values and finalizers of such weaks, and then
1892 // schedule them for finalization in `scheduleFinalizers` (which we run during
1893 // synchronization).
nonmovingMarkDeadWeaks(struct MarkQueue_ * queue,StgWeak ** dead_weaks)1894 void nonmovingMarkDeadWeaks (struct MarkQueue_ *queue, StgWeak **dead_weaks)
1895 {
1896 StgWeak *next_w;
1897 for (StgWeak *w = nonmoving_old_weak_ptr_list; w; w = next_w) {
1898 ASSERT(!nonmovingClosureMarkedThisCycle((P_)(w->key)));
1899 nonmovingMarkDeadWeak(queue, w);
1900 next_w = w ->link;
1901 w->link = *dead_weaks;
1902 *dead_weaks = w;
1903 }
1904 }
1905
1906 // Non-moving heap variant of of `tidyThreadList`
nonmovingTidyThreads()1907 void nonmovingTidyThreads ()
1908 {
1909 StgTSO *next;
1910 StgTSO **prev = &nonmoving_old_threads;
1911 for (StgTSO *t = nonmoving_old_threads; t != END_TSO_QUEUE; t = next) {
1912
1913 next = t->global_link;
1914
1915 // N.B. This thread is in old_threads, consequently we *know* it is in
1916 // the snapshot and it is therefore safe to rely on the bitmap to
1917 // determine its reachability.
1918 if (nonmovingIsNowAlive((StgClosure*)t)) {
1919 // alive
1920 *prev = next;
1921
1922 // move this thread onto threads list
1923 t->global_link = nonmoving_threads;
1924 nonmoving_threads = t;
1925 } else {
1926 // not alive (yet): leave this thread on the old_threads list
1927 prev = &(t->global_link);
1928 }
1929 }
1930 }
1931
1932 // Mark threads which appear to be dead but still need to be properly torn down
1933 // by resurrectThreads.
nonmovingResurrectThreads(struct MarkQueue_ * queue,StgTSO ** resurrected_threads)1934 void nonmovingResurrectThreads (struct MarkQueue_ *queue, StgTSO **resurrected_threads)
1935 {
1936 StgTSO *next;
1937 for (StgTSO *t = nonmoving_old_threads; t != END_TSO_QUEUE; t = next) {
1938 next = t->global_link;
1939
1940 switch (t->what_next) {
1941 case ThreadKilled:
1942 case ThreadComplete:
1943 continue;
1944 default:
1945 // The thread may be, e.g., deadlocked in which case we must ensure
1946 // it isn't swept since resurrectThreads will need to throw it an
1947 // exception.
1948 markQueuePushClosure_(queue, (StgClosure*)t);
1949 t->global_link = *resurrected_threads;
1950 *resurrected_threads = t;
1951 }
1952 }
1953 }
1954
1955 #if defined(DEBUG)
1956
printMarkQueueEntry(MarkQueueEnt * ent)1957 void printMarkQueueEntry (MarkQueueEnt *ent)
1958 {
1959 switch(nonmovingMarkQueueEntryType(ent)) {
1960 case MARK_CLOSURE:
1961 debugBelch("Closure: ");
1962 printClosure(ent->mark_closure.p);
1963 break;
1964 case MARK_ARRAY:
1965 debugBelch("Array\n");
1966 break;
1967 case NULL_ENTRY:
1968 debugBelch("End of mark\n");
1969 break;
1970 }
1971 }
1972
printMarkQueue(MarkQueue * q)1973 void printMarkQueue (MarkQueue *q)
1974 {
1975 debugBelch("======== MARK QUEUE ========\n");
1976 for (bdescr *block = q->blocks; block; block = block->link) {
1977 MarkQueueBlock *queue = (MarkQueueBlock*)block->start;
1978 for (uint32_t i = 0; i < queue->head; ++i) {
1979 printMarkQueueEntry(&queue->entries[i]);
1980 }
1981 }
1982 debugBelch("===== END OF MARK QUEUE ====\n");
1983 }
1984
1985 #endif
1986