1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2018
4 *
5 * Non-moving garbage collector and allocator
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "Rts.h"
10 #include "RtsUtils.h"
11 #include "Capability.h"
12 #include "Sparks.h"
13 #include "Printer.h"
14 #include "Storage.h"
15 // We call evacuate, which expects the thread-local gc_thread to be valid;
16 // This is sometimes declared as a register variable therefore it is necessary
17 // to include the declaration so that the compiler doesn't clobber the register.
18 #include "GCThread.h"
19 #include "GCTDecl.h"
20 #include "Schedule.h"
21 #include "Stats.h"
22
23 #include "NonMoving.h"
24 #include "NonMovingMark.h"
25 #include "NonMovingSweep.h"
26 #include "NonMovingCensus.h"
27 #include "StablePtr.h" // markStablePtrTable
28 #include "Schedule.h" // markScheduler
29 #include "Weak.h" // dead_weak_ptr_list
30
31 struct NonmovingHeap nonmovingHeap;
32
33 uint8_t nonmovingMarkEpoch = 1;
34
nonmovingBumpEpoch(void)35 static void nonmovingBumpEpoch(void) {
36 nonmovingMarkEpoch = nonmovingMarkEpoch == 1 ? 2 : 1;
37 }
38
39 #if defined(THREADED_RTS)
40 /*
41 * This mutex ensures that only one non-moving collection is active at a time.
42 */
43 Mutex nonmoving_collection_mutex;
44
45 OSThreadId mark_thread;
46 bool concurrent_coll_running = false;
47 Condition concurrent_coll_finished;
48 Mutex concurrent_coll_finished_lock;
49 #endif
50
51 /*
52 * Note [Non-moving garbage collector]
53 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
54 * The sources rts/NonMoving*.c implement GHC's non-moving garbage collector
55 * for the oldest generation. In contrast to the throughput-oriented moving
56 * collector, the non-moving collector is designed to achieve low GC latencies
57 * on large heaps. It accomplishes low-latencies by way of a concurrent
58 * mark-and-sweep collection strategy on a specially-designed heap structure.
59 * While the design is described in detail in the design document found in
60 * docs/storage/nonmoving-gc, we briefly summarize the structure here.
61 *
62 *
63 * === Heap Structure ===
64 *
65 * The nonmoving heap (embodied by struct NonmovingHeap) consists of a family
66 * of allocators, each serving a range of allocation sizes. Each allocator
67 * consists of a set of *segments*, each of which contain fixed-size *blocks*
68 * (not to be confused with "blocks" provided by GHC's block allocator; this is
69 * admittedly an unfortunate overlap in terminology). These blocks are the
70 * backing store for the allocator. In addition to blocks, the segment also
71 * contains some header information (see struct NonmovingSegment in
72 * NonMoving.h). This header contains a *bitmap* encoding one byte per block
73 * (used by the collector to record liveness), as well as the index of the next
74 * unallocated block (and a *snapshot* of this field which will be described in
75 * the next section).
76 *
77 * Each allocator maintains three sets of segments:
78 *
79 * - A *current* segment for each capability; this is the segment which that
80 * capability will allocate into.
81 *
82 * - A pool of *active* segments, each of which containing at least one
83 * unallocated block. The allocate will take a segment from this pool when
84 * it fills its *current* segment.
85 *
86 * - A set of *filled* segments, which contain no unallocated blocks and will
87 * be collected during the next major GC cycle
88 *
89 * Storage for segments is allocated using the block allocator using an aligned
90 * group of NONMOVING_SEGMENT_BLOCKS blocks. This makes the task of locating
91 * the segment header for a clone a simple matter of bit-masking (as
92 * implemented by nonmovingGetSegment).
93 *
94 * In addition, to relieve pressure on the block allocator we keep a small pool
95 * of free blocks around (nonmovingHeap.free) which can be pushed/popped
96 * to/from in a lock-free manner.
97 *
98 *
99 * === Allocation ===
100 *
101 * The allocator (as implemented by nonmovingAllocate) starts by identifying
102 * which allocator the request should be made against. It then allocates into
103 * its local current segment and bumps the next_free pointer to point to the
104 * next unallocated block (as indicated by the bitmap). If it finds the current
105 * segment is now full it moves it to the filled list and looks for a new
106 * segment to make current from a few sources:
107 *
108 * 1. the allocator's active list (see pop_active_segment)
109 * 2. the nonmoving heap's free block pool (see nonmovingPopFreeSegment)
110 * 3. allocate a new segment from the block allocator (see
111 * nonmovingAllocSegment)
112 *
113 * Note that allocation does *not* involve modifying the bitmap. The bitmap is
114 * only modified by the collector.
115 *
116 *
117 * === Snapshot invariant ===
118 *
119 * To safely collect in a concurrent setting, the collector relies on the
120 * notion of a *snapshot*. The snapshot is a hypothetical frozen state of the
121 * heap topology taken at the beginning of the major collection cycle.
122 * With this definition we require the following property of the mark phase,
123 * which we call the *snapshot invariant*,
124 *
125 * All objects that were reachable at the time the snapshot was collected
126 * must have their mark bits set at the end of the mark phase.
127 *
128 * As the mutator might change the topology of the heap while we are marking
129 * this property requires some cooperation from the mutator to maintain.
130 * Specifically, we rely on a write barrier as described in Note [Update
131 * remembered set].
132 *
133 * To determine which objects were existent when the snapshot was taken we
134 * record a snapshot of each segments next_free pointer at the beginning of
135 * collection.
136 *
137 *
138 * === Collection ===
139 *
140 * Collection happens in a few phases some of which occur during a
141 * stop-the-world period (marked with [STW]) and others which can occur
142 * concurrently with mutation and minor collection (marked with [CONC]):
143 *
144 * 1. [STW] Preparatory GC: Here we do a standard minor collection of the
145 * younger generations (which may evacuate things to the nonmoving heap).
146 * References from younger generations into the nonmoving heap are recorded
147 * in the mark queue (see Note [Aging under the non-moving collector] in
148 * this file).
149 *
150 * 2. [STW] Snapshot update: Here we update the segment snapshot metadata
151 * (see nonmovingPrepareMark) and move the filled segments to
152 * nonmovingHeap.sweep_list, which is the set of segments which we will
153 * sweep this GC cycle.
154 *
155 * 3. [STW] Root collection: Here we walk over a variety of root sources
156 * and add them to the mark queue (see nonmovingCollect).
157 *
158 * 4. [CONC] Concurrent marking: Here we do the majority of marking concurrently
159 * with mutator execution (but with the write barrier enabled; see
160 * Note [Update remembered set]).
161 *
162 * 5. [STW] Final sync: Here we interrupt the mutators, ask them to
163 * flush their final update remembered sets, and mark any new references
164 * we find.
165 *
166 * 6. [CONC] Sweep: Here we walk over the nonmoving segments on sweep_list
167 * and place them back on either the active, current, or filled list,
168 * depending upon how much live data they contain.
169 *
170 *
171 * === Marking ===
172 *
173 * Ignoring large and static objects, marking a closure is fairly
174 * straightforward (implemented in NonMovingMark.c:mark_closure):
175 *
176 * 1. Check whether the closure is in the non-moving generation; if not then
177 * we ignore it.
178 * 2. Find the segment containing the closure's block.
179 * 3. Check whether the closure's block is above $seg->next_free_snap; if so
180 * then the block was not allocated when we took the snapshot and therefore
181 * we don't need to mark it.
182 * 4. Check whether the block's bitmap bits is equal to nonmovingMarkEpoch. If
183 * so then we can stop as we have already marked it.
184 * 5. Push the closure's pointers to the mark queue.
185 * 6. Set the blocks bitmap bits to nonmovingMarkEpoch.
186 *
187 * Note that the ordering of (5) and (6) is rather important, as described in
188 * Note [StgStack dirtiness flags and concurrent marking].
189 *
190 *
191 * === Other references ===
192 *
193 * Apart from the design document in docs/storage/nonmoving-gc and the Ueno
194 * 2016 paper [ueno 2016] from which it drew inspiration, there are a variety
195 * of other relevant Notes scattered throughout the tree:
196 *
197 * - Note [Concurrent non-moving collection] (NonMoving.c) describes
198 * concurrency control of the nonmoving collector
199 *
200 * - Note [Live data accounting in nonmoving collector] (NonMoving.c)
201 * describes how we track the quantity of live data in the nonmoving
202 * generation.
203 *
204 * - Note [Aging under the non-moving collector] (NonMoving.c) describes how
205 * we accomodate aging
206 *
207 * - Note [Non-moving GC: Marking evacuated objects] (Evac.c) describes how
208 * non-moving objects reached by evacuate() are marked, which is necessary
209 * due to aging.
210 *
211 * - Note [Large objects in the non-moving collector] (NonMovingMark.c)
212 * describes how we track large objects.
213 *
214 * - Note [Update remembered set] (NonMovingMark.c) describes the function and
215 * implementation of the update remembered set used to realize the concurrent
216 * write barrier.
217 *
218 * - Note [Concurrent read barrier on deRefWeak#] (NonMovingMark.c) describes
219 * the read barrier on Weak# objects.
220 *
221 * - Note [Unintentional marking in resurrectThreads] (NonMovingMark.c) describes
222 * a tricky interaction between the update remembered set flush and weak
223 * finalization.
224 *
225 * - Note [Origin references in the nonmoving collector] (NonMovingMark.h)
226 * describes how we implement indirection short-cutting and the selector
227 * optimisation.
228 *
229 * - Note [StgStack dirtiness flags and concurrent marking] (TSO.h) describes
230 * the protocol for concurrent marking of stacks.
231 *
232 * - Note [Static objects under the nonmoving collector] (Storage.c) describes
233 * treatment of static objects.
234 *
235 * - Note [Dirty flags in the non-moving collector] (NonMoving.c) describes
236 * how we use the DIRTY flags associated with MUT_VARs and TVARs to improve
237 * barrier efficiency.
238 *
239 * [ueno 2016]:
240 * Katsuhiro Ueno and Atsushi Ohori. 2016. A fully concurrent garbage
241 * collector for functional programs on multicore processors. SIGPLAN Not. 51,
242 * 9 (September 2016), 421–433. DOI:https://doi.org/10.1145/3022670.2951944
243 *
244 *
245 * Note [Concurrent non-moving collection]
246 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
247 * Concurrency-control of non-moving garbage collection is a bit tricky. There
248 * are a few things to keep in mind:
249 *
250 * - Only one non-moving collection may be active at a time. This is enforced by the
251 * concurrent_coll_running flag, which is set when a collection is on-going. If
252 * we attempt to initiate a new collection while this is set we wait on the
253 * concurrent_coll_finished condition variable, which signals when the
254 * active collection finishes.
255 *
256 * - In between the mark and sweep phases the non-moving collector must synchronize
257 * with mutator threads to collect and mark their final update remembered
258 * sets. This is accomplished using
259 * stopAllCapabilitiesWith(SYNC_FLUSH_UPD_REM_SET). Capabilities are held
260 * the final mark has concluded.
261 *
262 * Note that possibility of concurrent minor and non-moving collections
263 * requires that we handle static objects a bit specially. See
264 * Note [Static objects under the nonmoving collector] in Storage.c
265 * for details.
266 *
267 *
268 * Note [Aging under the non-moving collector]
269 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
270 *
271 * The initial design of the non-moving collector mandated that all live data
272 * be evacuated to the non-moving heap prior to a major collection. This
273 * simplified certain bits of implementation and eased reasoning. However, it
274 * was (unsurprisingly) also found to result in significant amounts of
275 * unnecessary copying.
276 *
277 * Consequently, we now allow aging. Aging allows the preparatory GC leading up
278 * to a major collection to evacuate some objects into the young generation.
279 * However, this introduces the following tricky case that might arise after
280 * we have finished the preparatory GC:
281 *
282 * moving heap ┆ non-moving heap
283 * ───────────────┆──────────────────
284 * ┆
285 * B ←────────────── A ←─────────────── root
286 * │ ┆ ↖─────────────── gen1 mut_list
287 * ╰───────────────→ C
288 * ┆
289 *
290 * In this case C is clearly live, but the non-moving collector can only see
291 * this by walking through B, which lives in the moving heap. However, doing so
292 * would require that we synchronize with the mutator/minor GC to ensure that it
293 * isn't in the middle of moving B. What to do?
294 *
295 * The solution we use here is to teach the preparatory moving collector to
296 * "evacuate" objects it encounters in the non-moving heap by adding them to
297 * the mark queue. This is implemented by pushing the object to the update
298 * remembered set of the capability held by the evacuating gc_thread
299 * (implemented by markQueuePushClosureGC)
300 *
301 * Consequently collection of the case above would proceed as follows:
302 *
303 * 1. Initial state:
304 * * A lives in the non-moving heap and is reachable from the root set
305 * * A is on the oldest generation's mut_list, since it contains a pointer
306 * to B, which lives in a younger generation
307 * * B lives in the moving collector's from space
308 * * C lives in the non-moving heap
309 *
310 * 2. Preparatory GC: Scavenging mut_lists:
311 *
312 * The mut_list of the oldest generation is scavenged, resulting in B being
313 * evacuated (aged) into the moving collector's to-space.
314 *
315 * 3. Preparatory GC: Scavenge B
316 *
317 * B (now in to-space) is scavenged, resulting in evacuation of C.
318 * evacuate(C) pushes a reference to C to the mark queue.
319 *
320 * 4. Non-moving GC: C is marked
321 *
322 * The non-moving collector will come to C in the mark queue and mark it.
323 *
324 * The implementation details of this are described in Note [Non-moving GC:
325 * Marking evacuated objects] in Evac.c.
326 *
327 * Note [Deadlock detection under the non-moving collector]
328 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
329 * In GHC the garbage collector is responsible for identifying deadlocked
330 * programs. Providing for this responsibility is slightly tricky in the
331 * non-moving collector due to the existence of aging. In particular, the
332 * non-moving collector cannot traverse objects living in a young generation
333 * but reachable from the non-moving generation, as described in Note [Aging
334 * under the non-moving collector].
335 *
336 * However, this can pose trouble for deadlock detection since it means that we
337 * may conservatively mark dead closures as live. Consider this case:
338 *
339 * moving heap ┆ non-moving heap
340 * ───────────────┆──────────────────
341 * ┆
342 * MVAR_QUEUE ←───── TSO ←───────────── gen1 mut_list
343 * ↑ │ ╰────────↗ │
344 * │ │ ┆ │
345 * │ │ ┆ ↓
346 * │ ╰──────────→ MVAR
347 * ╰─────────────────╯
348 * ┆
349 *
350 * In this case we have a TSO blocked on a dead MVar. Because the MVAR_TSO_QUEUE on
351 * which it is blocked lives in the moving heap, the TSO is necessarily on the
352 * oldest generation's mut_list. As in Note [Aging under the non-moving
353 * collector], the MVAR_TSO_QUEUE will be evacuated. If MVAR_TSO_QUEUE is aged
354 * (e.g. evacuated to the young generation) then the MVAR will be added to the
355 * mark queue. Consequently, we will falsely conclude that the MVAR is still
356 * alive and fail to spot the deadlock.
357 *
358 * To avoid this sort of situation we disable aging when we are starting a
359 * major GC specifically for deadlock detection (as done by
360 * scheduleDetectDeadlock). This condition is recorded by the
361 * deadlock_detect_gc global variable declared in GC.h. Setting this has a few
362 * effects on the preparatory GC:
363 *
364 * - Evac.c:alloc_for_copy forces evacuation to the non-moving generation.
365 *
366 * - The evacuation logic usually responsible for pushing objects living in
367 * the non-moving heap to the mark queue is disabled. This is safe because
368 * we know that all live objects will be in the non-moving heap by the end
369 * of the preparatory moving collection.
370 *
371 *
372 * Note [Live data accounting in nonmoving collector]
373 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
374 * The nonmoving collector uses an approximate heuristic for reporting live
375 * data quantity. Specifically, during mark we record how much live data we
376 * find in nonmoving_live_words. At the end of mark we declare this amount to
377 * be how much live data we have on in the nonmoving heap (by setting
378 * oldest_gen->live_estimate).
379 *
380 * In addition, we update oldest_gen->live_estimate every time we fill a
381 * segment. This, as well, is quite approximate: we assume that all blocks
382 * above next_free_next are newly-allocated. In principle we could refer to the
383 * bitmap to count how many blocks we actually allocated but this too would be
384 * approximate due to concurrent collection and ultimately seems more costly
385 * than the problem demands.
386 *
387 *
388 * Note [Spark management under the nonmoving collector]
389 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
390 * Every GC, both minor and major, prunes the spark queue (using
391 * Sparks.c:pruneSparkQueue) of sparks which are no longer reachable.
392 * Doing this with concurrent collection is a tad subtle since the minor
393 * collections cannot rely on the mark bitmap to accurately reflect the
394 * reachability of a spark.
395 *
396 * We use a conservative reachability approximation:
397 *
398 * - Minor collections assume that all sparks living in the non-moving heap
399 * are reachable.
400 *
401 * - Major collections prune the spark queue during the final sync. This pruning
402 * assumes that all sparks in the young generations are reachable (since the
403 * BF_EVACUATED flag won't be set on the nursery blocks) and will consequently
404 * only prune dead sparks living in the non-moving heap.
405 *
406 *
407 * Note [Dirty flags in the non-moving collector]
408 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
409 * Some mutable object types (e.g. MUT_VARs, TVARs) have a one-bit dirty flag
410 * encoded in their info table pointer. The moving collector's uses this flag
411 * to minimize redundant mut_list entries. The flag is preserves the following
412 * simple invariant:
413 *
414 * An object being marked as dirty implies that the object is on mut_list.
415 *
416 * This allows a nice optimisation in the write barrier (e.g. dirty_MUT_VAR):
417 * if we write to an already-dirty object there is no need to
418 * push it to the mut_list as we know it's already there.
419 *
420 * During GC (scavenging) we will then keep track of whether all of the
421 * object's reference have been promoted. If so we can mark the object as clean.
422 * If not then we re-add it to mut_list and mark it as dirty.
423 *
424 * In the non-moving collector we use the same dirty flag to implement a
425 * related optimisation on the non-moving write barrier: Specifically, the
426 * snapshot invariant only requires that the non-moving write barrier applies
427 * to the *first* mutation to an object after collection begins. To achieve this,
428 * we impose the following invariant:
429 *
430 * An object being marked as dirty implies that all of its fields are on
431 * the mark queue (or, equivalently, update remembered set).
432 *
433 * With this guarantee we can safely make the the write barriers dirty objects
434 * no-ops. We perform this optimisation for the following object types:
435 *
436 * - MVAR
437 * - TVAR
438 * - MUT_VAR
439 *
440 * However, maintaining this invariant requires great care. For instance,
441 * consider the case of an MVar (which has two pointer fields) before
442 * preparatory collection:
443 *
444 * Non-moving heap ┊ Moving heap
445 * gen 1 ┊ gen 0
446 * ──────────────────────┼────────────────────────────────
447 * ┊
448 * MVAR A ────────────────→ X
449 * (dirty) ───────────╮
450 * ┊ ╰────→ Y
451 * ┊ │
452 * ┊ │
453 * ╭───────────────────────╯
454 * │ ┊
455 * ↓ ┊
456 * Z ┊
457 * ┊
458 *
459 * During the preparatory collection we promote Y to the nonmoving heap but
460 * fail to promote X. Since the failed_to_evac field is conservative (being set
461 * if *any* of the fields are not promoted), this gives us:
462 *
463 * Non-moving heap ┊ Moving heap
464 * gen 1 ┊ gen 0
465 * ──────────────────────┼────────────────────────────────
466 * ┊
467 * MVAR A ────────────────→ X
468 * (dirty) ┊
469 * │ ┊
470 * │ ┊
471 * ↓ ┊
472 * Y ┊
473 * │ ┊
474 * │ ┊
475 * ↓ ┊
476 * Z ┊
477 * ┊
478 *
479 * This is bad. When we resume mutation a mutator may mutate MVAR A; since it's
480 * already dirty we would fail to add Y to the update remembered set, breaking the
481 * snapshot invariant and potentially losing track of the liveness of Z.
482 *
483 * To avoid this nonmovingScavengeOne we eagerly pushes the values of the
484 * fields of all objects which it fails to evacuate (e.g. MVAR A) to the update
485 * remembered set during the preparatory GC. This allows us to safely skip the
486 * non-moving write barrier without jeopardizing the snapshot invariant.
487 *
488 */
489
490 memcount nonmoving_live_words = 0;
491
492 #if defined(THREADED_RTS)
493 static void* nonmovingConcurrentMark(void *mark_queue);
494 #endif
495 static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads);
496
nonmovingInitSegment(struct NonmovingSegment * seg,uint8_t log_block_size)497 static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t log_block_size)
498 {
499 bdescr *bd = Bdescr((P_) seg);
500 seg->link = NULL;
501 seg->todo_link = NULL;
502 seg->next_free = 0;
503 bd->nonmoving_segment.log_block_size = log_block_size;
504 bd->nonmoving_segment.next_free_snap = 0;
505 bd->u.scan = nonmovingSegmentGetBlock(seg, 0);
506 nonmovingClearBitmap(seg);
507 }
508
509 // Add a segment to the free list.
nonmovingPushFreeSegment(struct NonmovingSegment * seg)510 void nonmovingPushFreeSegment(struct NonmovingSegment *seg)
511 {
512 // See Note [Live data accounting in nonmoving collector].
513 if (nonmovingHeap.n_free > NONMOVING_MAX_FREE) {
514 bdescr *bd = Bdescr((StgPtr) seg);
515 ACQUIRE_SM_LOCK;
516 ASSERT(oldest_gen->n_blocks >= bd->blocks);
517 ASSERT(oldest_gen->n_words >= BLOCK_SIZE_W * bd->blocks);
518 oldest_gen->n_blocks -= bd->blocks;
519 oldest_gen->n_words -= BLOCK_SIZE_W * bd->blocks;
520 freeGroup(bd);
521 RELEASE_SM_LOCK;
522 return;
523 }
524
525 while (true) {
526 struct NonmovingSegment *old = nonmovingHeap.free;
527 seg->link = old;
528 if (cas((StgVolatilePtr) &nonmovingHeap.free, (StgWord) old, (StgWord) seg) == (StgWord) old)
529 break;
530 }
531 __sync_add_and_fetch(&nonmovingHeap.n_free, 1);
532 }
533
nonmovingPopFreeSegment(void)534 static struct NonmovingSegment *nonmovingPopFreeSegment(void)
535 {
536 while (true) {
537 struct NonmovingSegment *seg = nonmovingHeap.free;
538 if (seg == NULL) {
539 return NULL;
540 }
541 if (cas((StgVolatilePtr) &nonmovingHeap.free,
542 (StgWord) seg,
543 (StgWord) seg->link) == (StgWord) seg) {
544 __sync_sub_and_fetch(&nonmovingHeap.n_free, 1);
545 return seg;
546 }
547 }
548 }
549
nonmovingBlockCountFromSize(uint8_t log_block_size)550 unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size)
551 {
552 // We compute the overwhelmingly common size cases directly to avoid a very
553 // expensive integer division.
554 switch (log_block_size) {
555 case 3: return nonmovingBlockCount(3);
556 case 4: return nonmovingBlockCount(4);
557 case 5: return nonmovingBlockCount(5);
558 case 6: return nonmovingBlockCount(6);
559 case 7: return nonmovingBlockCount(7);
560 default: return nonmovingBlockCount(log_block_size);
561 }
562 }
563
564 /*
565 * Request a fresh segment from the free segment list or allocate one of the
566 * given node.
567 *
568 * Caller must hold SM_MUTEX (although we take the gc_alloc_block_sync spinlock
569 * under the assumption that we are in a GC context).
570 */
nonmovingAllocSegment(uint32_t node)571 static struct NonmovingSegment *nonmovingAllocSegment(uint32_t node)
572 {
573 // First try taking something off of the free list
574 struct NonmovingSegment *ret;
575 ret = nonmovingPopFreeSegment();
576
577 // Nothing in the free list, allocate a new segment...
578 if (ret == NULL) {
579 // Take gc spinlock: another thread may be scavenging a moving
580 // generation and call `todo_block_full`
581 ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
582 bdescr *bd = allocAlignedGroupOnNode(node, NONMOVING_SEGMENT_BLOCKS);
583 // See Note [Live data accounting in nonmoving collector].
584 oldest_gen->n_blocks += bd->blocks;
585 oldest_gen->n_words += BLOCK_SIZE_W * bd->blocks;
586 RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
587
588 for (StgWord32 i = 0; i < bd->blocks; ++i) {
589 initBdescr(&bd[i], oldest_gen, oldest_gen);
590 bd[i].flags = BF_NONMOVING;
591 }
592 ret = (struct NonmovingSegment *)bd->start;
593 }
594
595 // Check alignment
596 ASSERT(((uintptr_t)ret % NONMOVING_SEGMENT_SIZE) == 0);
597 return ret;
598 }
599
log2_ceil(unsigned long x)600 static inline unsigned long log2_ceil(unsigned long x)
601 {
602 return (sizeof(unsigned long)*8) - __builtin_clzl(x-1);
603 }
604
605 // Advance a segment's next_free pointer. Returns true if segment if full.
advance_next_free(struct NonmovingSegment * seg,const unsigned int blk_count)606 static bool advance_next_free(struct NonmovingSegment *seg, const unsigned int blk_count)
607 {
608 const uint8_t *bitmap = seg->bitmap;
609 ASSERT(blk_count == nonmovingSegmentBlockCount(seg));
610 #if defined(NAIVE_ADVANCE_FREE)
611 // reference implementation
612 for (unsigned int i = seg->next_free+1; i < blk_count; i++) {
613 if (!bitmap[i]) {
614 seg->next_free = i;
615 return false;
616 }
617 }
618 seg->next_free = blk_count;
619 return true;
620 #else
621 const uint8_t *c = memchr(&bitmap[seg->next_free+1], 0, blk_count - seg->next_free - 1);
622 if (c == NULL) {
623 seg->next_free = blk_count;
624 return true;
625 } else {
626 seg->next_free = c - bitmap;
627 return false;
628 }
629 #endif
630 }
631
pop_active_segment(struct NonmovingAllocator * alloca)632 static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *alloca)
633 {
634 while (true) {
635 struct NonmovingSegment *seg = alloca->active;
636 if (seg == NULL) {
637 return NULL;
638 }
639 if (cas((StgVolatilePtr) &alloca->active,
640 (StgWord) seg,
641 (StgWord) seg->link) == (StgWord) seg) {
642 return seg;
643 }
644 }
645 }
646
647 /* Allocate a block in the nonmoving heap. Caller must hold SM_MUTEX. sz is in words */
648 GNUC_ATTR_HOT
nonmovingAllocate(Capability * cap,StgWord sz)649 void *nonmovingAllocate(Capability *cap, StgWord sz)
650 {
651 unsigned int log_block_size = log2_ceil(sz * sizeof(StgWord));
652 unsigned int block_count = nonmovingBlockCountFromSize(log_block_size);
653
654 // The max we ever allocate is 3276 bytes (anything larger is a large
655 // object and not moved) which is covered by allocator 9.
656 ASSERT(log_block_size < NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT);
657
658 struct NonmovingAllocator *alloca = nonmovingHeap.allocators[log_block_size - NONMOVING_ALLOCA0];
659
660 // Allocate into current segment
661 struct NonmovingSegment *current = alloca->current[cap->no];
662 ASSERT(current); // current is never NULL
663 void *ret = nonmovingSegmentGetBlock_(current, log_block_size, current->next_free);
664 ASSERT(GET_CLOSURE_TAG(ret) == 0); // check alignment
665
666 // Advance the current segment's next_free or allocate a new segment if full
667 bool full = advance_next_free(current, block_count);
668 if (full) {
669 // Current segment is full: update live data estimate link it to
670 // filled, take an active segment if one exists, otherwise allocate a
671 // new segment.
672
673 // Update live data estimate.
674 // See Note [Live data accounting in nonmoving collector].
675 unsigned int new_blocks = block_count - nonmovingSegmentInfo(current)->next_free_snap;
676 unsigned int block_size = 1 << log_block_size;
677 atomic_inc(&oldest_gen->live_estimate, new_blocks * block_size / sizeof(W_));
678
679 // push the current segment to the filled list
680 nonmovingPushFilledSegment(current);
681
682 // first look for a new segment in the active list
683 struct NonmovingSegment *new_current = pop_active_segment(alloca);
684
685 // there are no active segments, allocate new segment
686 if (new_current == NULL) {
687 new_current = nonmovingAllocSegment(cap->node);
688 nonmovingInitSegment(new_current, log_block_size);
689 }
690
691 // make it current
692 new_current->link = NULL;
693 alloca->current[cap->no] = new_current;
694 }
695
696 return ret;
697 }
698
699 /* Allocate a nonmovingAllocator */
alloc_nonmoving_allocator(uint32_t n_caps)700 static struct NonmovingAllocator *alloc_nonmoving_allocator(uint32_t n_caps)
701 {
702 size_t allocator_sz =
703 sizeof(struct NonmovingAllocator) +
704 sizeof(void*) * n_caps; // current segment pointer for each capability
705 struct NonmovingAllocator *alloc =
706 stgMallocBytes(allocator_sz, "nonmovingInit");
707 memset(alloc, 0, allocator_sz);
708 return alloc;
709 }
710
free_nonmoving_allocator(struct NonmovingAllocator * alloc)711 static void free_nonmoving_allocator(struct NonmovingAllocator *alloc)
712 {
713 stgFree(alloc);
714 }
715
nonmovingInit(void)716 void nonmovingInit(void)
717 {
718 if (! RtsFlags.GcFlags.useNonmoving) return;
719 #if defined(THREADED_RTS)
720 initMutex(&nonmoving_collection_mutex);
721 initCondition(&concurrent_coll_finished);
722 initMutex(&concurrent_coll_finished_lock);
723 #endif
724 for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
725 nonmovingHeap.allocators[i] = alloc_nonmoving_allocator(n_capabilities);
726 }
727 nonmovingMarkInitUpdRemSet();
728 }
729
730 // Stop any nonmoving collection in preparation for RTS shutdown.
nonmovingStop(void)731 void nonmovingStop(void)
732 {
733 if (! RtsFlags.GcFlags.useNonmoving) return;
734 #if defined(THREADED_RTS)
735 if (mark_thread) {
736 debugTrace(DEBUG_nonmoving_gc,
737 "waiting for nonmoving collector thread to terminate");
738 ACQUIRE_LOCK(&concurrent_coll_finished_lock);
739 waitCondition(&concurrent_coll_finished, &concurrent_coll_finished_lock);
740 }
741 #endif
742 }
743
nonmovingExit(void)744 void nonmovingExit(void)
745 {
746 if (! RtsFlags.GcFlags.useNonmoving) return;
747
748 // First make sure collector is stopped before we tear things down.
749 nonmovingStop();
750
751 #if defined(THREADED_RTS)
752 closeMutex(&concurrent_coll_finished_lock);
753 closeCondition(&concurrent_coll_finished);
754 closeMutex(&nonmoving_collection_mutex);
755 #endif
756
757 for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
758 free_nonmoving_allocator(nonmovingHeap.allocators[i]);
759 }
760 }
761
762 /*
763 * Assumes that no garbage collector or mutator threads are running to safely
764 * resize the nonmoving_allocators.
765 *
766 * Must hold sm_mutex.
767 */
nonmovingAddCapabilities(uint32_t new_n_caps)768 void nonmovingAddCapabilities(uint32_t new_n_caps)
769 {
770 unsigned int old_n_caps = nonmovingHeap.n_caps;
771 struct NonmovingAllocator **allocs = nonmovingHeap.allocators;
772
773 for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
774 struct NonmovingAllocator *old = allocs[i];
775 allocs[i] = alloc_nonmoving_allocator(new_n_caps);
776
777 // Copy the old state
778 allocs[i]->filled = old->filled;
779 allocs[i]->active = old->active;
780 for (unsigned int j = 0; j < old_n_caps; j++) {
781 allocs[i]->current[j] = old->current[j];
782 }
783 stgFree(old);
784
785 // Initialize current segments for the new capabilities
786 for (unsigned int j = old_n_caps; j < new_n_caps; j++) {
787 allocs[i]->current[j] = nonmovingAllocSegment(capabilities[j]->node);
788 nonmovingInitSegment(allocs[i]->current[j], NONMOVING_ALLOCA0 + i);
789 allocs[i]->current[j]->link = NULL;
790 }
791 }
792 nonmovingHeap.n_caps = new_n_caps;
793 }
794
nonmovingClearBitmap(struct NonmovingSegment * seg)795 void nonmovingClearBitmap(struct NonmovingSegment *seg)
796 {
797 unsigned int n = nonmovingSegmentBlockCount(seg);
798 memset(seg->bitmap, 0, n);
799 }
800
801 /* Prepare the heap bitmaps and snapshot metadata for a mark */
nonmovingPrepareMark(void)802 static void nonmovingPrepareMark(void)
803 {
804 // See Note [Static objects under the nonmoving collector].
805 prev_static_flag = static_flag;
806 static_flag =
807 static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
808
809 // Should have been cleared by the last sweep
810 ASSERT(nonmovingHeap.sweep_list == NULL);
811
812 nonmovingBumpEpoch();
813 for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
814 struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
815
816 // Update current segments' snapshot pointers
817 for (uint32_t cap_n = 0; cap_n < n_capabilities; ++cap_n) {
818 struct NonmovingSegment *seg = alloca->current[cap_n];
819 nonmovingSegmentInfo(seg)->next_free_snap = seg->next_free;
820 }
821
822 // Save the filled segments for later processing during the concurrent
823 // mark phase.
824 alloca->saved_filled = alloca->filled;
825 alloca->filled = NULL;
826
827 // N.B. It's not necessary to update snapshot pointers of active segments;
828 // they were set after they were swept and haven't seen any allocation
829 // since.
830 }
831
832 // Clear large object bits of existing large objects
833 for (bdescr *bd = nonmoving_large_objects; bd; bd = bd->link) {
834 bd->flags &= ~BF_MARKED;
835 }
836
837 // Add newly promoted large objects and clear mark bits
838 bdescr *next;
839 ASSERT(oldest_gen->scavenged_large_objects == NULL);
840 for (bdescr *bd = oldest_gen->large_objects; bd; bd = next) {
841 next = bd->link;
842 bd->flags |= BF_NONMOVING_SWEEPING;
843 bd->flags &= ~BF_MARKED;
844 dbl_link_onto(bd, &nonmoving_large_objects);
845 }
846 n_nonmoving_large_blocks += oldest_gen->n_large_blocks;
847 oldest_gen->large_objects = NULL;
848 oldest_gen->n_large_words = 0;
849 oldest_gen->n_large_blocks = 0;
850 nonmoving_live_words = 0;
851
852 // Clear compact object mark bits
853 for (bdescr *bd = nonmoving_compact_objects; bd; bd = bd->link) {
854 bd->flags &= ~BF_MARKED;
855 }
856
857 // Move new compact objects from younger generations to nonmoving_compact_objects
858 for (bdescr *bd = oldest_gen->compact_objects; bd; bd = next) {
859 next = bd->link;
860 bd->flags |= BF_NONMOVING_SWEEPING;
861 bd->flags &= ~BF_MARKED;
862 dbl_link_onto(bd, &nonmoving_compact_objects);
863 }
864 n_nonmoving_compact_blocks += oldest_gen->n_compact_blocks;
865 oldest_gen->n_compact_blocks = 0;
866 oldest_gen->compact_objects = NULL;
867 // TODO (osa): what about "in import" stuff??
868
869
870
871 #if defined(DEBUG)
872 debug_caf_list_snapshot = debug_caf_list;
873 debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
874 #endif
875 }
876
877 // Mark weak pointers in the non-moving heap. They'll either end up in
878 // dead_weak_ptr_list or stay in weak_ptr_list. Either way they need to be kept
879 // during sweep. See `MarkWeak.c:markWeakPtrList` for the moving heap variant
880 // of this.
nonmovingMarkWeakPtrList(MarkQueue * mark_queue,StgWeak * dead_weak_ptr_list)881 static void nonmovingMarkWeakPtrList(MarkQueue *mark_queue, StgWeak *dead_weak_ptr_list)
882 {
883 for (StgWeak *w = oldest_gen->weak_ptr_list; w; w = w->link) {
884 markQueuePushClosure_(mark_queue, (StgClosure*)w);
885 // Do not mark finalizers and values here, those fields will be marked
886 // in `nonmovingMarkDeadWeaks` (for dead weaks) or
887 // `nonmovingTidyWeaks` (for live weaks)
888 }
889
890 // We need to mark dead_weak_ptr_list too. This is subtle:
891 //
892 // - By the beginning of this GC we evacuated all weaks to the non-moving
893 // heap (in `markWeakPtrList`)
894 //
895 // - During the scavenging of the moving heap we discovered that some of
896 // those weaks are dead and moved them to `dead_weak_ptr_list`. Note that
897 // because of the fact above _all weaks_ are in the non-moving heap at
898 // this point.
899 //
900 // - So, to be able to traverse `dead_weak_ptr_list` and run finalizers we
901 // need to mark it.
902 for (StgWeak *w = dead_weak_ptr_list; w; w = w->link) {
903 markQueuePushClosure_(mark_queue, (StgClosure*)w);
904 nonmovingMarkDeadWeak(mark_queue, w);
905 }
906 }
907
nonmovingCollect(StgWeak ** dead_weaks,StgTSO ** resurrected_threads)908 void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
909 {
910 #if defined(THREADED_RTS)
911 // We can't start a new collection until the old one has finished
912 // We also don't run in final GC
913 if (concurrent_coll_running || sched_state > SCHED_RUNNING) {
914 return;
915 }
916 #endif
917
918 trace(TRACE_nonmoving_gc, "Starting nonmoving GC preparation");
919 resizeGenerations();
920
921 nonmovingPrepareMark();
922
923 // N.B. These should have been cleared at the end of the last sweep.
924 ASSERT(nonmoving_marked_large_objects == NULL);
925 ASSERT(n_nonmoving_marked_large_blocks == 0);
926 ASSERT(nonmoving_marked_compact_objects == NULL);
927 ASSERT(n_nonmoving_marked_compact_blocks == 0);
928
929 MarkQueue *mark_queue = stgMallocBytes(sizeof(MarkQueue), "mark queue");
930 initMarkQueue(mark_queue);
931 current_mark_queue = mark_queue;
932
933 // Mark roots
934 trace(TRACE_nonmoving_gc, "Marking roots for nonmoving GC");
935 markCAFs((evac_fn)markQueueAddRoot, mark_queue);
936 for (unsigned int n = 0; n < n_capabilities; ++n) {
937 markCapability((evac_fn)markQueueAddRoot, mark_queue,
938 capabilities[n], true/*don't mark sparks*/);
939 }
940 markScheduler((evac_fn)markQueueAddRoot, mark_queue);
941 nonmovingMarkWeakPtrList(mark_queue, *dead_weaks);
942 markStablePtrTable((evac_fn)markQueueAddRoot, mark_queue);
943
944 // Mark threads resurrected during moving heap scavenging
945 for (StgTSO *tso = *resurrected_threads; tso != END_TSO_QUEUE; tso = tso->global_link) {
946 markQueuePushClosure_(mark_queue, (StgClosure*)tso);
947 }
948 trace(TRACE_nonmoving_gc, "Finished marking roots for nonmoving GC");
949
950 // Roots marked, mark threads and weak pointers
951
952 // At this point all threads are moved to threads list (from old_threads)
953 // and all weaks are moved to weak_ptr_list (from old_weak_ptr_list) by
954 // the previous scavenge step, so we need to move them to "old" lists
955 // again.
956
957 // Fine to override old_threads because any live or resurrected threads are
958 // moved to threads or resurrected_threads lists.
959 ASSERT(oldest_gen->old_threads == END_TSO_QUEUE);
960 ASSERT(nonmoving_old_threads == END_TSO_QUEUE);
961 nonmoving_old_threads = oldest_gen->threads;
962 oldest_gen->threads = END_TSO_QUEUE;
963
964 // Make sure we don't lose any weak ptrs here. Weaks in old_weak_ptr_list
965 // will either be moved to `dead_weaks` (if dead) or `weak_ptr_list` (if
966 // alive).
967 ASSERT(oldest_gen->old_weak_ptr_list == NULL);
968 ASSERT(nonmoving_old_weak_ptr_list == NULL);
969 nonmoving_old_weak_ptr_list = oldest_gen->weak_ptr_list;
970 oldest_gen->weak_ptr_list = NULL;
971 trace(TRACE_nonmoving_gc, "Finished nonmoving GC preparation");
972
973 // We are now safe to start concurrent marking
974
975 // Note that in concurrent mark we can't use dead_weaks and
976 // resurrected_threads from the preparation to add new weaks and threads as
977 // that would cause races between minor collection and mark. So we only pass
978 // those lists to mark function in sequential case. In concurrent case we
979 // allocate fresh lists.
980
981 #if defined(THREADED_RTS)
982 // If we're interrupting or shutting down, do not let this capability go and
983 // run a STW collection. Reason: we won't be able to acquire this capability
984 // again for the sync if we let it go, because it'll immediately start doing
985 // a major GC, becuase that's what we do when exiting scheduler (see
986 // exitScheduler()).
987 if (sched_state == SCHED_RUNNING) {
988 concurrent_coll_running = true;
989 nonmoving_write_barrier_enabled = true;
990 debugTrace(DEBUG_nonmoving_gc, "Starting concurrent mark thread");
991 if (createOSThread(&mark_thread, "non-moving mark thread",
992 nonmovingConcurrentMark, mark_queue) != 0) {
993 barf("nonmovingCollect: failed to spawn mark thread: %s", strerror(errno));
994 }
995 } else {
996 nonmovingConcurrentMark(mark_queue);
997 }
998 #else
999 // Use the weak and thread lists from the preparation for any new weaks and
1000 // threads found to be dead in mark.
1001 nonmovingMark_(mark_queue, dead_weaks, resurrected_threads);
1002 #endif
1003 }
1004
1005 /* Mark mark queue, threads, and weak pointers until no more weaks have been
1006 * resuscitated
1007 */
nonmovingMarkThreadsWeaks(MarkQueue * mark_queue)1008 static void nonmovingMarkThreadsWeaks(MarkQueue *mark_queue)
1009 {
1010 while (true) {
1011 // Propagate marks
1012 nonmovingMark(mark_queue);
1013
1014 // Tidy threads and weaks
1015 nonmovingTidyThreads();
1016
1017 if (! nonmovingTidyWeaks(mark_queue))
1018 return;
1019 }
1020 }
1021
1022 #if defined(THREADED_RTS)
nonmovingConcurrentMark(void * data)1023 static void* nonmovingConcurrentMark(void *data)
1024 {
1025 MarkQueue *mark_queue = (MarkQueue*)data;
1026 StgWeak *dead_weaks = NULL;
1027 StgTSO *resurrected_threads = (StgTSO*)&stg_END_TSO_QUEUE_closure;
1028 nonmovingMark_(mark_queue, &dead_weaks, &resurrected_threads);
1029 return NULL;
1030 }
1031
1032 // TODO: Not sure where to put this function.
1033 // Append w2 to the end of w1.
appendWeakList(StgWeak ** w1,StgWeak * w2)1034 static void appendWeakList( StgWeak **w1, StgWeak *w2 )
1035 {
1036 while (*w1) {
1037 w1 = &(*w1)->link;
1038 }
1039 *w1 = w2;
1040 }
1041 #endif
1042
nonmovingMark_(MarkQueue * mark_queue,StgWeak ** dead_weaks,StgTSO ** resurrected_threads)1043 static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads)
1044 {
1045 ACQUIRE_LOCK(&nonmoving_collection_mutex);
1046 debugTrace(DEBUG_nonmoving_gc, "Starting mark...");
1047 stat_startNonmovingGc();
1048
1049 // Walk the list of filled segments that we collected during preparation,
1050 // updated their snapshot pointers and move them to the sweep list.
1051 for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
1052 struct NonmovingSegment *filled = nonmovingHeap.allocators[alloca_idx]->saved_filled;
1053 uint32_t n_filled = 0;
1054 if (filled) {
1055 struct NonmovingSegment *seg = filled;
1056 while (true) {
1057 // Set snapshot
1058 nonmovingSegmentInfo(seg)->next_free_snap = seg->next_free;
1059 n_filled++;
1060 if (seg->link)
1061 seg = seg->link;
1062 else
1063 break;
1064 }
1065 // add filled segments to sweep_list
1066 seg->link = nonmovingHeap.sweep_list;
1067 nonmovingHeap.sweep_list = filled;
1068 }
1069 }
1070
1071 // Do concurrent marking; most of the heap will get marked here.
1072 nonmovingMarkThreadsWeaks(mark_queue);
1073
1074 #if defined(THREADED_RTS)
1075 Task *task = newBoundTask();
1076
1077 // If at this point if we've decided to exit then just return
1078 if (sched_state > SCHED_RUNNING) {
1079 // Note that we break our invariants here and leave segments in
1080 // nonmovingHeap.sweep_list, don't free nonmoving_large_objects etc.
1081 // However because we won't be running mark-sweep in the final GC this
1082 // is OK.
1083
1084 // This is a RTS shutdown so we need to move our copy (snapshot) of
1085 // weaks (nonmoving_old_weak_ptr_list and nonmoving_weak_ptr_list) to
1086 // oldest_gen->threads to be able to run C finalizers in hs_exit_. Note
1087 // that there may be more weaks added to oldest_gen->threads since we
1088 // started mark, so we need to append our list to the tail of
1089 // oldest_gen->threads.
1090 appendWeakList(&nonmoving_old_weak_ptr_list, nonmoving_weak_ptr_list);
1091 appendWeakList(&oldest_gen->weak_ptr_list, nonmoving_old_weak_ptr_list);
1092 // These lists won't be used again so this is not necessary, but still
1093 nonmoving_old_weak_ptr_list = NULL;
1094 nonmoving_weak_ptr_list = NULL;
1095
1096 goto finish;
1097 }
1098
1099 // We're still running, request a sync
1100 nonmovingBeginFlush(task);
1101
1102 bool all_caps_syncd;
1103 do {
1104 all_caps_syncd = nonmovingWaitForFlush();
1105 nonmovingMarkThreadsWeaks(mark_queue);
1106 } while (!all_caps_syncd);
1107 #endif
1108
1109 nonmovingResurrectThreads(mark_queue, resurrected_threads);
1110
1111 // No more resurrecting threads after this point
1112
1113 // Do last marking of weak pointers
1114 while (true) {
1115 // Propagate marks
1116 nonmovingMark(mark_queue);
1117
1118 if (!nonmovingTidyWeaks(mark_queue))
1119 break;
1120 }
1121
1122 nonmovingMarkDeadWeaks(mark_queue, dead_weaks);
1123
1124 // Propagate marks
1125 nonmovingMark(mark_queue);
1126
1127 // Now remove all dead objects from the mut_list to ensure that a younger
1128 // generation collection doesn't attempt to look at them after we've swept.
1129 nonmovingSweepMutLists();
1130
1131 debugTrace(DEBUG_nonmoving_gc,
1132 "Done marking, resurrecting threads before releasing capabilities");
1133
1134
1135 // Schedule finalizers and resurrect threads
1136 #if defined(THREADED_RTS)
1137 // Just pick a random capability. Not sure if this is a good idea -- we use
1138 // only one capability for all finalizers.
1139 scheduleFinalizers(capabilities[0], *dead_weaks);
1140 // Note that this mutates heap and causes running write barriers.
1141 // See Note [Unintentional marking in resurrectThreads] in NonMovingMark.c
1142 // for how we deal with this.
1143 resurrectThreads(*resurrected_threads);
1144 #endif
1145
1146 #if defined(DEBUG)
1147 // Zap CAFs that we will sweep
1148 nonmovingGcCafs();
1149 #endif
1150
1151 ASSERT(mark_queue->top->head == 0);
1152 ASSERT(mark_queue->blocks->link == NULL);
1153
1154 // Update oldest_gen thread and weak lists
1155 // Note that we need to append these lists as a concurrent minor GC may have
1156 // added stuff to them while we're doing mark-sweep concurrently
1157 {
1158 StgTSO **threads = &oldest_gen->threads;
1159 while (*threads != END_TSO_QUEUE) {
1160 threads = &(*threads)->global_link;
1161 }
1162 *threads = nonmoving_threads;
1163 nonmoving_threads = END_TSO_QUEUE;
1164 nonmoving_old_threads = END_TSO_QUEUE;
1165 }
1166
1167 {
1168 StgWeak **weaks = &oldest_gen->weak_ptr_list;
1169 while (*weaks) {
1170 weaks = &(*weaks)->link;
1171 }
1172 *weaks = nonmoving_weak_ptr_list;
1173 nonmoving_weak_ptr_list = NULL;
1174 nonmoving_old_weak_ptr_list = NULL;
1175 }
1176
1177 // Prune spark lists
1178 // See Note [Spark management under the nonmoving collector].
1179 #if defined(THREADED_RTS)
1180 for (uint32_t n = 0; n < n_capabilities; n++) {
1181 pruneSparkQueue(true, capabilities[n]);
1182 }
1183 #endif
1184
1185 // Everything has been marked; allow the mutators to proceed
1186 #if defined(THREADED_RTS)
1187 nonmoving_write_barrier_enabled = false;
1188 nonmovingFinishFlush(task);
1189 #endif
1190
1191 current_mark_queue = NULL;
1192 freeMarkQueue(mark_queue);
1193 stgFree(mark_queue);
1194
1195 oldest_gen->live_estimate = nonmoving_live_words;
1196 oldest_gen->n_old_blocks = 0;
1197 resizeGenerations();
1198
1199 /****************************************************
1200 * Sweep
1201 ****************************************************/
1202
1203 traceConcSweepBegin();
1204
1205 // Because we can't mark large object blocks (no room for mark bit) we
1206 // collect them in a map in mark_queue and we pass it here to sweep large
1207 // objects
1208 nonmovingSweepLargeObjects();
1209 nonmovingSweepCompactObjects();
1210 nonmovingSweepStableNameTable();
1211
1212 nonmovingSweep();
1213 ASSERT(nonmovingHeap.sweep_list == NULL);
1214 debugTrace(DEBUG_nonmoving_gc, "Finished sweeping.");
1215 traceConcSweepEnd();
1216 #if defined(DEBUG)
1217 if (RtsFlags.DebugFlags.nonmoving_gc)
1218 nonmovingPrintAllocatorCensus();
1219 #endif
1220 #if defined(TRACING)
1221 if (RtsFlags.TraceFlags.nonmoving_gc)
1222 nonmovingTraceAllocatorCensus();
1223 #endif
1224
1225 // TODO: Remainder of things done by GarbageCollect (update stats)
1226
1227 #if defined(THREADED_RTS)
1228 finish:
1229 boundTaskExiting(task);
1230
1231 // We are done...
1232 mark_thread = 0;
1233 stat_endNonmovingGc();
1234
1235 // Signal that the concurrent collection is finished, allowing the next
1236 // non-moving collection to proceed
1237 concurrent_coll_running = false;
1238 signalCondition(&concurrent_coll_finished);
1239 RELEASE_LOCK(&nonmoving_collection_mutex);
1240 #endif
1241 }
1242
1243 #if defined(DEBUG)
1244
1245 // Use this with caution: this doesn't work correctly during scavenge phase
1246 // when we're doing parallel scavenging. Use it in mark phase or later (where
1247 // we don't allocate more anymore).
assert_in_nonmoving_heap(StgPtr p)1248 void assert_in_nonmoving_heap(StgPtr p)
1249 {
1250 if (!HEAP_ALLOCED_GC(p))
1251 return;
1252
1253 bdescr *bd = Bdescr(p);
1254 if (bd->flags & BF_LARGE) {
1255 // It should be in a capability (if it's not filled yet) or in non-moving heap
1256 for (uint32_t cap = 0; cap < n_capabilities; ++cap) {
1257 if (bd == capabilities[cap]->pinned_object_block) {
1258 return;
1259 }
1260 }
1261 ASSERT(bd->flags & BF_NONMOVING);
1262 return;
1263 }
1264
1265 // Search snapshot segments
1266 for (struct NonmovingSegment *seg = nonmovingHeap.sweep_list; seg; seg = seg->link) {
1267 if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
1268 return;
1269 }
1270 }
1271
1272 for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
1273 struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
1274 // Search current segments
1275 for (uint32_t cap_idx = 0; cap_idx < n_capabilities; ++cap_idx) {
1276 struct NonmovingSegment *seg = alloca->current[cap_idx];
1277 if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
1278 return;
1279 }
1280 }
1281
1282 // Search active segments
1283 int seg_idx = 0;
1284 struct NonmovingSegment *seg = alloca->active;
1285 while (seg) {
1286 if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
1287 return;
1288 }
1289 seg_idx++;
1290 seg = seg->link;
1291 }
1292
1293 // Search filled segments
1294 seg_idx = 0;
1295 seg = alloca->filled;
1296 while (seg) {
1297 if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
1298 return;
1299 }
1300 seg_idx++;
1301 seg = seg->link;
1302 }
1303 }
1304
1305 // We don't search free segments as they're unused
1306
1307 barf("%p is not in nonmoving heap\n", (void*)p);
1308 }
1309
nonmovingPrintSegment(struct NonmovingSegment * seg)1310 void nonmovingPrintSegment(struct NonmovingSegment *seg)
1311 {
1312 int num_blocks = nonmovingSegmentBlockCount(seg);
1313 uint8_t log_block_size = nonmovingSegmentLogBlockSize(seg);
1314
1315 debugBelch("Segment with %d blocks of size 2^%d (%d bytes, %u words, scan: %p)\n",
1316 num_blocks,
1317 log_block_size,
1318 1 << log_block_size,
1319 (unsigned int) ROUNDUP_BYTES_TO_WDS(1 << log_block_size),
1320 (void*)Bdescr((P_)seg)->u.scan);
1321
1322 for (nonmoving_block_idx p_idx = 0; p_idx < seg->next_free; ++p_idx) {
1323 StgClosure *p = (StgClosure*)nonmovingSegmentGetBlock(seg, p_idx);
1324 if (nonmovingGetMark(seg, p_idx) != 0) {
1325 debugBelch("%d (%p)* :\t", p_idx, (void*)p);
1326 } else {
1327 debugBelch("%d (%p) :\t", p_idx, (void*)p);
1328 }
1329 printClosure(p);
1330 }
1331
1332 debugBelch("End of segment\n\n");
1333 }
1334
nonmovingPrintAllocator(struct NonmovingAllocator * alloc)1335 void nonmovingPrintAllocator(struct NonmovingAllocator *alloc)
1336 {
1337 debugBelch("Allocator at %p\n", (void*)alloc);
1338 debugBelch("Filled segments:\n");
1339 for (struct NonmovingSegment *seg = alloc->filled; seg != NULL; seg = seg->link) {
1340 debugBelch("%p ", (void*)seg);
1341 }
1342 debugBelch("\nActive segments:\n");
1343 for (struct NonmovingSegment *seg = alloc->active; seg != NULL; seg = seg->link) {
1344 debugBelch("%p ", (void*)seg);
1345 }
1346 debugBelch("\nCurrent segments:\n");
1347 for (uint32_t i = 0; i < n_capabilities; ++i) {
1348 debugBelch("%p ", alloc->current[i]);
1349 }
1350 debugBelch("\n");
1351 }
1352
locate_object(P_ obj)1353 void locate_object(P_ obj)
1354 {
1355 // Search allocators
1356 for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
1357 struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
1358 for (uint32_t cap = 0; cap < n_capabilities; ++cap) {
1359 struct NonmovingSegment *seg = alloca->current[cap];
1360 if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
1361 debugBelch("%p is in current segment of capability %d of allocator %d at %p\n", obj, cap, alloca_idx, (void*)seg);
1362 return;
1363 }
1364 }
1365 int seg_idx = 0;
1366 struct NonmovingSegment *seg = alloca->active;
1367 while (seg) {
1368 if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
1369 debugBelch("%p is in active segment %d of allocator %d at %p\n", obj, seg_idx, alloca_idx, (void*)seg);
1370 return;
1371 }
1372 seg_idx++;
1373 seg = seg->link;
1374 }
1375
1376 seg_idx = 0;
1377 seg = alloca->filled;
1378 while (seg) {
1379 if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
1380 debugBelch("%p is in filled segment %d of allocator %d at %p\n", obj, seg_idx, alloca_idx, (void*)seg);
1381 return;
1382 }
1383 seg_idx++;
1384 seg = seg->link;
1385 }
1386 }
1387
1388 struct NonmovingSegment *seg = nonmovingHeap.free;
1389 int seg_idx = 0;
1390 while (seg) {
1391 if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
1392 debugBelch("%p is in free segment %d at %p\n", obj, seg_idx, (void*)seg);
1393 return;
1394 }
1395 seg_idx++;
1396 seg = seg->link;
1397 }
1398
1399 // Search nurseries
1400 for (uint32_t nursery_idx = 0; nursery_idx < n_nurseries; ++nursery_idx) {
1401 for (bdescr* nursery_block = nurseries[nursery_idx].blocks; nursery_block; nursery_block = nursery_block->link) {
1402 if (obj >= nursery_block->start && obj <= nursery_block->start + nursery_block->blocks*BLOCK_SIZE_W) {
1403 debugBelch("%p is in nursery %d\n", obj, nursery_idx);
1404 return;
1405 }
1406 }
1407 }
1408
1409 // Search generations
1410 for (uint32_t g = 0; g < RtsFlags.GcFlags.generations - 1; ++g) {
1411 generation *gen = &generations[g];
1412 for (bdescr *blk = gen->blocks; blk; blk = blk->link) {
1413 if (obj >= blk->start && obj < blk->free) {
1414 debugBelch("%p is in generation %" FMT_Word32 " blocks\n", obj, g);
1415 return;
1416 }
1417 }
1418 for (bdescr *blk = gen->old_blocks; blk; blk = blk->link) {
1419 if (obj >= blk->start && obj < blk->free) {
1420 debugBelch("%p is in generation %" FMT_Word32 " old blocks\n", obj, g);
1421 return;
1422 }
1423 }
1424 }
1425
1426 // Search large objects
1427 for (uint32_t g = 0; g < RtsFlags.GcFlags.generations - 1; ++g) {
1428 generation *gen = &generations[g];
1429 for (bdescr *large_block = gen->large_objects; large_block; large_block = large_block->link) {
1430 if ((P_)large_block->start == obj) {
1431 debugBelch("%p is in large blocks of generation %d\n", obj, g);
1432 return;
1433 }
1434 }
1435 }
1436
1437 for (bdescr *large_block = nonmoving_large_objects; large_block; large_block = large_block->link) {
1438 if ((P_)large_block->start == obj) {
1439 debugBelch("%p is in nonmoving_large_objects\n", obj);
1440 return;
1441 }
1442 }
1443
1444 for (bdescr *large_block = nonmoving_marked_large_objects; large_block; large_block = large_block->link) {
1445 if ((P_)large_block->start == obj) {
1446 debugBelch("%p is in nonmoving_marked_large_objects\n", obj);
1447 return;
1448 }
1449 }
1450
1451 // Search workspaces FIXME only works in non-threaded runtime
1452 #if !defined(THREADED_RTS)
1453 for (uint32_t g = 0; g < RtsFlags.GcFlags.generations - 1; ++ g) {
1454 gen_workspace *ws = &gct->gens[g];
1455 for (bdescr *blk = ws->todo_bd; blk; blk = blk->link) {
1456 if (obj >= blk->start && obj < blk->free) {
1457 debugBelch("%p is in generation %" FMT_Word32 " todo bds\n", obj, g);
1458 return;
1459 }
1460 }
1461 for (bdescr *blk = ws->scavd_list; blk; blk = blk->link) {
1462 if (obj >= blk->start && obj < blk->free) {
1463 debugBelch("%p is in generation %" FMT_Word32 " scavd bds\n", obj, g);
1464 return;
1465 }
1466 }
1467 for (bdescr *blk = ws->todo_large_objects; blk; blk = blk->link) {
1468 if (obj >= blk->start && obj < blk->free) {
1469 debugBelch("%p is in generation %" FMT_Word32 " todo large bds\n", obj, g);
1470 return;
1471 }
1472 }
1473 }
1474 #endif
1475 }
1476
nonmovingPrintSweepList()1477 void nonmovingPrintSweepList()
1478 {
1479 debugBelch("==== SWEEP LIST =====\n");
1480 int i = 0;
1481 for (struct NonmovingSegment *seg = nonmovingHeap.sweep_list; seg; seg = seg->link) {
1482 debugBelch("%d: %p\n", i++, (void*)seg);
1483 }
1484 debugBelch("= END OF SWEEP LIST =\n");
1485 }
1486
check_in_mut_list(StgClosure * p)1487 void check_in_mut_list(StgClosure *p)
1488 {
1489 for (uint32_t cap_n = 0; cap_n < n_capabilities; ++cap_n) {
1490 for (bdescr *bd = capabilities[cap_n]->mut_lists[oldest_gen->no]; bd; bd = bd->link) {
1491 for (StgPtr q = bd->start; q < bd->free; ++q) {
1492 if (*((StgPtr**)q) == (StgPtr*)p) {
1493 debugBelch("Object is in mut list of cap %d: %p\n", cap_n, capabilities[cap_n]->mut_lists[oldest_gen->no]);
1494 return;
1495 }
1496 }
1497 }
1498 }
1499
1500 debugBelch("Object is not in a mut list\n");
1501 }
1502
print_block_list(bdescr * bd)1503 void print_block_list(bdescr* bd)
1504 {
1505 while (bd) {
1506 debugBelch("%p, ", (void*)bd);
1507 bd = bd->link;
1508 }
1509 debugBelch("\n");
1510 }
1511
print_thread_list(StgTSO * tso)1512 void print_thread_list(StgTSO* tso)
1513 {
1514 while (tso != END_TSO_QUEUE) {
1515 printClosure((StgClosure*)tso);
1516 tso = tso->global_link;
1517 }
1518 }
1519
1520 #endif
1521