1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/storage/gc
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "GC.h"
19 #include "GCThread.h"
20 #include "GCTDecl.h" // NB. before RtsSignals.h which
21 // clobbers REG_R1 on arm/Linux
22 #include "Compact.h"
23 #include "Evac.h"
24 #include "Scav.h"
25 #include "GCUtils.h"
26 #include "MarkStack.h"
27 #include "MarkWeak.h"
28 #include "Sparks.h"
29 #include "Sweep.h"
30
31 #include "Arena.h"
32 #include "Storage.h"
33 #include "RtsUtils.h"
34 #include "Apply.h"
35 #include "Updates.h"
36 #include "Stats.h"
37 #include "Schedule.h"
38 #include "Sanity.h"
39 #include "BlockAlloc.h"
40 #include "ProfHeap.h"
41 #include "Weak.h"
42 #include "Prelude.h"
43 #include "RtsSignals.h"
44 #include "STM.h"
45 #include "Trace.h"
46 #include "RetainerProfile.h"
47 #include "LdvProfile.h"
48 #include "RaiseAsync.h"
49 #include "StableName.h"
50 #include "StablePtr.h"
51 #include "CheckUnload.h"
52 #include "CNF.h"
53 #include "RtsFlags.h"
54 #include "NonMoving.h"
55
56 #include <string.h> // for memset()
57 #include <unistd.h>
58
59 /* -----------------------------------------------------------------------------
60 Global variables
61 -------------------------------------------------------------------------- */
62
63 /* STATIC OBJECT LIST.
64 *
65 * During GC:
66 * We maintain a linked list of static objects that are still live.
67 * The requirements for this list are:
68 *
69 * - we need to scan the list while adding to it, in order to
70 * scavenge all the static objects (in the same way that
71 * breadth-first scavenging works for dynamic objects).
72 *
73 * - we need to be able to tell whether an object is already on
74 * the list, to break loops.
75 *
76 * Each static object has a "static link field", which we use for
77 * linking objects on to the list. We use a stack-type list, consing
78 * objects on the front as they are added (this means that the
79 * scavenge phase is depth-first, not breadth-first, but that
80 * shouldn't matter).
81 *
82 * A separate list is kept for objects that have been scavenged
83 * already - this is so that we can zero all the marks afterwards.
84 *
85 * An object is on the list if its static link field is non-zero; this
86 * means that we have to mark the end of the list with '1', not NULL.
87 *
88 * Extra notes for generational GC:
89 *
90 * Each generation has a static object list associated with it. When
91 * collecting generations up to N, we treat the static object lists
92 * from generations > N as roots.
93 *
94 * We build up a static object list while collecting generations 0..N,
95 * which is then appended to the static object list of generation N+1.
96 *
97 * See also: Note [STATIC_LINK fields] in Storage.h.
98 */
99
100 /* Hot GC globals
101 * ~~~~~~~~~~~~~~
102 * The globals below are quite hot during GC but read-only, initialized during
103 * the beginning of collection. It is important that they reside in the same
104 * cache-line to minimize unnecessary cache misses.
105 */
106
107 /* N is the oldest generation being collected, where the generations
108 * are numbered starting at 0. A major GC (indicated by the major_gc
109 * flag) is when we're collecting all generations. We only attempt to
110 * deal with static objects and GC CAFs when doing a major GC.
111 */
112 uint32_t N;
113 bool major_gc;
114 bool deadlock_detect_gc;
115 bool unload_mark_needed;
116
117 /* Data used for allocation area sizing.
118 */
119 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
120
121 /* Mut-list stats */
122 #if defined(DEBUG)
123 // For lack of a better option we protect mutlist_scav_stats with oldest_gen->sync
124 MutListScavStats mutlist_scav_stats;
125 #endif
126
127 /* Thread-local data for each GC thread
128 */
129 gc_thread **gc_threads = NULL;
130
131 #if !defined(THREADED_RTS)
132 // Must be aligned to 64-bytes to meet stated 64-byte alignment of gen_workspace
133 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)]
134 ATTRIBUTE_ALIGNED(64);
135 #endif
136
137 /* Note [n_gc_threads]
138 This is a global variable that originally tracked the number of threads
139 participating in the current gc. It's meaing has diverged from this somewhate.
140 In practise, it now takes one of the values {1, n_capabilities}. Don't be
141 tricked into thinking this means garbage collections must have 1 or
142 n_capabilities participating: idle capabilities (idle_cap[cap->no]) are included
143 in the n_gc_thread count.
144
145 Clearly this is in need of some tidying up, but for now we tread carefully. We
146 check n_gc_threads > 1 to see whether we are in a parallel or sequential. We
147 ensure n_gc_threads > 1 before iterating over gc_threads a la:
148
149 for(i=0;i<n_gc_threads;++i) { foo(gc_thread[i]; )}
150
151 Omitting this check has led to issues such as #19147.
152 */
153 uint32_t n_gc_threads;
154
155 // For stats:
156 static long copied; // *words* copied & scavenged during this GC
157
158 #if defined(PROF_SPIN) && defined(THREADED_RTS)
159 // spin and yield counts for the quasi-SpinLock in waitForGcThreads
160 volatile StgWord64 waitForGcThreads_spin = 0;
161 volatile StgWord64 waitForGcThreads_yield = 0;
162 volatile StgWord64 whitehole_gc_spin = 0;
163 #endif
164
165 bool work_stealing;
166
167 uint32_t static_flag = STATIC_FLAG_B;
168 uint32_t prev_static_flag = STATIC_FLAG_A;
169
170 DECLARE_GCT
171
172 /* -----------------------------------------------------------------------------
173 Static function declarations
174 -------------------------------------------------------------------------- */
175
176 static void mark_root (void *user, StgClosure **root);
177 static void prepare_collected_gen (generation *gen);
178 static void prepare_uncollected_gen (generation *gen);
179 static void init_gc_thread (gc_thread *t);
180 static void resize_nursery (void);
181 static void start_gc_threads (void);
182 static void scavenge_until_all_done (void);
183 static StgWord inc_running (void);
184 static StgWord dec_running (void);
185 static void wakeup_gc_threads (uint32_t me, bool idle_cap[]);
186 static void shutdown_gc_threads (uint32_t me, bool idle_cap[]);
187 static void collect_gct_blocks (void);
188 static void collect_pinned_object_blocks (void);
189 static void heapOverflow (void);
190
191 #if defined(DEBUG)
192 static void gcCAFs (void);
193 #endif
194
195 /* -----------------------------------------------------------------------------
196 The mark stack.
197 -------------------------------------------------------------------------- */
198
199 bdescr *mark_stack_top_bd; // topmost block in the mark stack
200 bdescr *mark_stack_bd; // current block in the mark stack
201 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
202
203
204 /* -----------------------------------------------------------------------------
205 Statistics from mut_list scavenging
206 -------------------------------------------------------------------------- */
207
208 #if defined(DEBUG)
209 void
zeroMutListScavStats(MutListScavStats * src)210 zeroMutListScavStats(MutListScavStats *src)
211 {
212 memset(src, 0, sizeof(MutListScavStats));
213 }
214
215 void
addMutListScavStats(const MutListScavStats * src,MutListScavStats * dest)216 addMutListScavStats(const MutListScavStats *src,
217 MutListScavStats *dest)
218 {
219 #define ADD_STATS(field) dest->field += src->field;
220 ADD_STATS(n_MUTVAR);
221 ADD_STATS(n_MUTARR);
222 ADD_STATS(n_MVAR);
223 ADD_STATS(n_TVAR);
224 ADD_STATS(n_TREC_CHUNK);
225 ADD_STATS(n_TVAR_WATCH_QUEUE);
226 ADD_STATS(n_TREC_HEADER);
227 ADD_STATS(n_OTHERS);
228 #undef ADD_STATS
229 }
230 #endif /* DEBUG */
231
232
233 /* -----------------------------------------------------------------------------
234 GarbageCollect: the main entry point to the garbage collector.
235
236 The collect_gen parameter is gotten by calling calcNeeded().
237
238 Locks held: all capabilities are held throughout GarbageCollect().
239 -------------------------------------------------------------------------- */
240
241 void
GarbageCollect(uint32_t collect_gen,const bool do_heap_census,const bool deadlock_detect,uint32_t gc_type USED_IF_THREADS,Capability * cap,bool idle_cap[])242 GarbageCollect (uint32_t collect_gen,
243 const bool do_heap_census,
244 const bool deadlock_detect,
245 uint32_t gc_type USED_IF_THREADS,
246 Capability *cap,
247 bool idle_cap[])
248 {
249 bdescr *bd;
250 generation *gen;
251 StgWord live_blocks, live_words, par_max_copied, par_balanced_copied,
252 gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
253 any_work, no_work, scav_find_work;
254 #if defined(THREADED_RTS)
255 gc_thread *saved_gct;
256 #endif
257 uint32_t g, n;
258 // The time we should report our heap census as occurring at, if necessary.
259 Time mut_time = 0;
260
261 if (do_heap_census) {
262 RTSStats stats;
263 getRTSStats(&stats);
264 mut_time = stats.mutator_cpu_ns;
265 }
266
267 // necessary if we stole a callee-saves register for gct:
268 #if defined(THREADED_RTS)
269 saved_gct = gct;
270 #endif
271
272 #if defined(PROFILING)
273 CostCentreStack *save_CCS[n_capabilities];
274 #endif
275
276 ACQUIRE_SM_LOCK;
277
278 #if defined(RTS_USER_SIGNALS)
279 if (RtsFlags.MiscFlags.install_signal_handlers) {
280 // block signals
281 blockUserSignals();
282 }
283 #endif
284
285 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
286 // otherwise adjust the padding in gen_workspace.
287
288 // this is the main thread
289 SET_GCT(gc_threads[cap->no]);
290
291 // tell the stats department that we've started a GC
292 stat_startGC(cap, gct);
293
294 // Lock the StablePtr table. This prevents FFI calls manipulating
295 // the table from occurring during GC.
296 stablePtrLock();
297
298 #if defined(DEBUG)
299 zeroMutListScavStats(&mutlist_scav_stats);
300 #endif
301
302 // attribute any costs to CCS_GC
303 #if defined(PROFILING)
304 for (n = 0; n < n_capabilities; n++) {
305 save_CCS[n] = capabilities[n]->r.rCCCS;
306 capabilities[n]->r.rCCCS = CCS_GC;
307 }
308 #endif
309
310 /* Figure out which generation to collect
311 */
312 N = collect_gen;
313 major_gc = (N == RtsFlags.GcFlags.generations-1);
314
315 /* See Note [Deadlock detection under nonmoving collector]. */
316 deadlock_detect_gc = deadlock_detect;
317
318 #if defined(THREADED_RTS)
319 if (major_gc && RtsFlags.GcFlags.useNonmoving && concurrent_coll_running) {
320 /* If there is already a concurrent major collection running then
321 * there is no benefit to starting another.
322 * TODO: Catch heap-size runaway.
323 */
324 N--;
325 collect_gen--;
326 major_gc = false;
327 }
328 #endif
329
330 /* N.B. The nonmoving collector works a bit differently. See
331 * Note [Static objects under the nonmoving collector].
332 */
333 if (major_gc && !RtsFlags.GcFlags.useNonmoving) {
334 prev_static_flag = static_flag;
335 static_flag =
336 static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
337 }
338
339 if (major_gc) {
340 unload_mark_needed = prepareUnloadCheck();
341 } else {
342 unload_mark_needed = false;
343 }
344
345 #if defined(THREADED_RTS)
346 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
347 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
348 // It's not always a good idea to do load balancing in parallel
349 // GC. In particular, for a parallel program we don't want to
350 // lose locality by moving cached data into another CPU's cache
351 // (this effect can be quite significant).
352 //
353 // We could have a more complex way to determine whether to do
354 // work stealing or not, e.g. it might be a good idea to do it
355 // if the heap is big. For now, we just turn it on or off with
356 // a flag.
357 #endif
358
359 /* Start threads, so they can be spinning up while we finish initialisation.
360 */
361 start_gc_threads();
362
363 #if defined(THREADED_RTS)
364 /* How many threads will be participating in this GC?
365 * We don't try to parallelise minor GCs (unless the user asks for
366 * it with +RTS -gn0), or mark/compact/sweep GC.
367 */
368 if (gc_type == SYNC_GC_PAR) {
369 n_gc_threads = n_capabilities;
370 } else {
371 n_gc_threads = 1;
372 }
373 #else
374 n_gc_threads = 1;
375 #endif
376
377 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
378 N, n_gc_threads);
379
380 #if defined(DEBUG)
381 // check for memory leaks if DEBUG is on
382 memInventory(DEBUG_gc);
383 #endif
384
385 // do this *before* we start scavenging
386 collectFreshWeakPtrs();
387
388 // check sanity *before* GC
389 IF_DEBUG(sanity, checkSanity(false /* before GC */, major_gc));
390
391 // gather blocks allocated using allocatePinned() from each capability
392 // and put them on the g0->large_object list.
393 collect_pinned_object_blocks();
394
395 // Initialise all the generations that we're collecting.
396 for (g = 0; g <= N; g++) {
397 prepare_collected_gen(&generations[g]);
398 }
399 // Initialise all the generations that we're *not* collecting.
400 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
401 prepare_uncollected_gen(&generations[g]);
402 }
403
404 // Prepare this gc_thread
405 init_gc_thread(gct);
406
407 /* Allocate a mark stack if we're doing a major collection.
408 */
409 if (major_gc && oldest_gen->mark) {
410 mark_stack_bd = allocBlock();
411 mark_stack_top_bd = mark_stack_bd;
412 mark_stack_bd->link = NULL;
413 mark_stack_bd->u.back = NULL;
414 mark_sp = mark_stack_bd->start;
415 } else {
416 mark_stack_bd = NULL;
417 mark_stack_top_bd = NULL;
418 mark_sp = NULL;
419 }
420
421 /* -----------------------------------------------------------------------
422 * follow all the roots that we know about:
423 */
424
425 // the main thread is running: this prevents any other threads from
426 // exiting prematurely, so we can start them now.
427 // NB. do this after the mutable lists have been saved above, otherwise
428 // the other GC threads will be writing into the old mutable lists.
429 inc_running();
430 wakeup_gc_threads(gct->thread_index, idle_cap);
431
432 traceEventGcWork(gct->cap);
433
434 // scavenge the capability-private mutable lists. This isn't part
435 // of markSomeCapabilities() because markSomeCapabilities() can only
436 // call back into the GC via mark_root() (due to the gct register
437 // variable).
438 if (n_gc_threads == 1) {
439 for (n = 0; n < n_capabilities; n++) {
440 #if defined(THREADED_RTS)
441 scavenge_capability_mut_Lists1(capabilities[n]);
442 #else
443 scavenge_capability_mut_lists(capabilities[n]);
444 #endif
445 }
446 } else {
447 scavenge_capability_mut_lists(gct->cap);
448 for (n = 0; n < n_capabilities; n++) {
449 if (idle_cap[n]) {
450 markCapability(mark_root, gct, capabilities[n],
451 true/*don't mark sparks*/);
452 scavenge_capability_mut_lists(capabilities[n]);
453 }
454 }
455 }
456
457 // follow roots from the CAF list (used by GHCi)
458 gct->evac_gen_no = 0;
459 markCAFs(mark_root, gct);
460
461 // follow all the roots that the application knows about.
462 gct->evac_gen_no = 0;
463 if (n_gc_threads == 1) {
464 for (n = 0; n < n_capabilities; n++) {
465 markCapability(mark_root, gct, capabilities[n],
466 true/*don't mark sparks*/);
467 }
468 } else {
469 markCapability(mark_root, gct, cap, true/*don't mark sparks*/);
470 }
471
472 markScheduler(mark_root, gct);
473
474 // Mark the weak pointer list, and prepare to detect dead weak pointers.
475 markWeakPtrList();
476 initWeakForGC();
477
478 // Mark the stable pointer table.
479 markStablePtrTable(mark_root, gct);
480
481 // Remember old stable name addresses.
482 rememberOldStableNameAddresses ();
483
484 /* -------------------------------------------------------------------------
485 * Repeatedly scavenge all the areas we know about until there's no
486 * more scavenging to be done.
487 */
488
489 StgWeak *dead_weak_ptr_list = NULL;
490 StgTSO *resurrected_threads = END_TSO_QUEUE;
491
492 for (;;)
493 {
494 scavenge_until_all_done();
495
496 // The other threads are now stopped. We might recurse back to
497 // here, but from now on this is the only thread.
498
499 // must be last... invariant is that everything is fully
500 // scavenged at this point.
501 if (traverseWeakPtrList(&dead_weak_ptr_list, &resurrected_threads)) { // returns true if evaced something
502 inc_running();
503 continue;
504 }
505
506 // If we get to here, there's really nothing left to do.
507 break;
508 }
509
510 shutdown_gc_threads(gct->thread_index, idle_cap);
511
512 // Now see which stable names are still alive.
513 gcStableNameTable();
514
515 #if defined(THREADED_RTS)
516 if (n_gc_threads == 1) {
517 for (n = 0; n < n_capabilities; n++) {
518 pruneSparkQueue(false, capabilities[n]);
519 }
520 } else {
521 for (n = 0; n < n_capabilities; n++) {
522 if (n == cap->no || idle_cap[n]) {
523 pruneSparkQueue(false, capabilities[n]);
524 }
525 }
526 }
527 #endif
528
529 #if defined(PROFILING)
530 // We call processHeapClosureForDead() on every closure destroyed during
531 // the current garbage collection, so we invoke LdvCensusForDead().
532 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
533 || RtsFlags.ProfFlags.bioSelector != NULL) {
534 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
535 LdvCensusForDead(N);
536 ACQUIRE_SM_LOCK;
537 }
538 #endif
539
540 // NO MORE EVACUATION AFTER THIS POINT!
541
542 // Finally: compact or sweep the oldest generation.
543 if (major_gc && oldest_gen->mark) {
544 if (oldest_gen->compact)
545 compact(gct->scavenged_static_objects,
546 &dead_weak_ptr_list,
547 &resurrected_threads);
548 else
549 sweep(oldest_gen);
550 }
551
552 copied = 0;
553 par_max_copied = 0;
554 par_balanced_copied = 0;
555 gc_spin_spin = 0;
556 gc_spin_yield = 0;
557 mut_spin_spin = 0;
558 mut_spin_yield = 0;
559 any_work = 0;
560 no_work = 0;
561 scav_find_work = 0;
562 {
563 uint32_t i;
564 uint64_t par_balanced_copied_acc = 0;
565 const gc_thread* thread;
566
567 // see Note [n_gc_threads]
568 if (n_gc_threads > 1) {
569 for (i=0; i < n_gc_threads; i++) {
570 copied += RELAXED_LOAD(&gc_threads[i]->copied);
571 }
572 for (i=0; i < n_gc_threads; i++) {
573 thread = gc_threads[i];
574 debugTrace(DEBUG_gc,"thread %d:", i);
575 debugTrace(DEBUG_gc," copied %ld",
576 RELAXED_LOAD(&thread->copied) * sizeof(W_));
577 debugTrace(DEBUG_gc," scanned %ld",
578 RELAXED_LOAD(&thread->scanned) * sizeof(W_));
579 debugTrace(DEBUG_gc," any_work %ld",
580 RELAXED_LOAD(&thread->any_work));
581 debugTrace(DEBUG_gc," no_work %ld",
582 RELAXED_LOAD(&thread->no_work));
583 debugTrace(DEBUG_gc," scav_find_work %ld",
584 RELAXED_LOAD(&thread->scav_find_work));
585
586 #if defined(THREADED_RTS) && defined(PROF_SPIN)
587 gc_spin_spin += RELAXED_LOAD(&thread->gc_spin.spin);
588 gc_spin_yield += RELAXED_LOAD(&thread->gc_spin.yield);
589 mut_spin_spin += RELAXED_LOAD(&thread->mut_spin.spin);
590 mut_spin_yield += RELAXED_LOAD(&thread->mut_spin.yield);
591 #endif
592
593 any_work += RELAXED_LOAD(&thread->any_work);
594 no_work += RELAXED_LOAD(&thread->no_work);
595 scav_find_work += RELAXED_LOAD(&thread->scav_find_work);
596
597 par_max_copied = stg_max(RELAXED_LOAD(&thread->copied), par_max_copied);
598 par_balanced_copied_acc +=
599 stg_min(n_gc_threads * RELAXED_LOAD(&thread->copied), copied);
600 }
601 // See Note [Work Balance] for an explanation of this computation
602 par_balanced_copied =
603 (par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) /
604 (n_gc_threads - 1);
605 } else {
606 copied += gct->copied;
607 }
608 }
609
610 // Run through all the generations and tidy up.
611 // We're going to:
612 // - count the amount of "live" data (live_words, live_blocks)
613 // - count the amount of "copied" data in this GC (copied)
614 // - free from-space
615 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
616 //
617 live_words = 0;
618 live_blocks = 0;
619
620 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
621
622 if (g == N) {
623 generations[g].collections++; // for stats
624 if (n_gc_threads > 1) generations[g].par_collections++;
625 }
626
627 // Count the mutable list as bytes "copied" for the purposes of
628 // stats. Every mutable list is copied during every GC.
629 if (g > 0) {
630 W_ mut_list_size = 0;
631 for (n = 0; n < n_capabilities; n++) {
632 mut_list_size += countOccupied(capabilities[n]->mut_lists[g]);
633 }
634 copied += mut_list_size;
635
636 debugTrace(DEBUG_gc,
637 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d others)",
638 (unsigned long)(mut_list_size * sizeof(W_)),
639 mutlist_scav_stats.n_MUTVAR,
640 mutlist_scav_stats.n_MUTARR,
641 mutlist_scav_stats.n_MVAR,
642 mutlist_scav_stats.n_TVAR,
643 mutlist_scav_stats.n_TVAR_WATCH_QUEUE,
644 mutlist_scav_stats.n_TREC_CHUNK,
645 mutlist_scav_stats.n_TREC_HEADER,
646 mutlist_scav_stats.n_OTHERS);
647 }
648
649 bdescr *next, *prev;
650 gen = &generations[g];
651
652 // for generations we collected...
653 if (g <= N && !(RtsFlags.GcFlags.useNonmoving && gen == oldest_gen)) {
654
655 /* free old memory and shift to-space into from-space for all
656 * the collected generations (except the allocation area). These
657 * freed blocks will probaby be quickly recycled.
658 */
659 if (gen->mark)
660 {
661 // tack the new blocks on the end of the existing blocks
662 if (gen->old_blocks != NULL) {
663
664 prev = NULL;
665 for (bd = gen->old_blocks; bd != NULL; bd = next) {
666
667 next = bd->link;
668
669 if (!(bd->flags & BF_MARKED))
670 {
671 if (prev == NULL) {
672 gen->old_blocks = next;
673 } else {
674 prev->link = next;
675 }
676 freeGroup(bd);
677 gen->n_old_blocks--;
678 }
679 else
680 {
681 gen->n_words += bd->free - bd->start;
682
683 // NB. this step might not be compacted next
684 // time, so reset the BF_MARKED flags.
685 // They are set before GC if we're going to
686 // compact. (search for BF_MARKED above).
687 bd->flags &= ~BF_MARKED;
688
689 // between GCs, all blocks in the heap except
690 // for the nursery have the BF_EVACUATED flag set.
691 bd->flags |= BF_EVACUATED;
692
693 prev = bd;
694 }
695 }
696
697 if (prev != NULL) {
698 prev->link = gen->blocks;
699 gen->blocks = gen->old_blocks;
700 }
701 }
702 // add the new blocks to the block tally
703 gen->n_blocks += gen->n_old_blocks;
704 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
705 ASSERT(countOccupied(gen->blocks) == gen->n_words);
706 }
707 else // not copacted
708 {
709 freeChain(gen->old_blocks);
710 }
711
712 gen->old_blocks = NULL;
713 gen->n_old_blocks = 0;
714
715 /* LARGE OBJECTS. The current live large objects are chained on
716 * scavenged_large, having been moved during garbage
717 * collection from large_objects. Any objects left on the
718 * large_objects list are therefore dead, so we free them here.
719 */
720 freeChain(gen->large_objects);
721 gen->large_objects = gen->scavenged_large_objects;
722 gen->n_large_blocks = gen->n_scavenged_large_blocks;
723 gen->n_large_words = countOccupied(gen->large_objects);
724 gen->n_new_large_words = 0;
725
726 /* COMPACT_NFDATA. The currently live compacts are chained
727 * to live_compact_objects, quite like large objects. And
728 * objects left on the compact_objects list are dead.
729 *
730 * We don't run a simple freeChain because want to give the
731 * CNF module some chance to free memory that freeChain would
732 * not see (namely blocks appended to a CNF through a compactResize).
733 *
734 * See Note [Compact Normal Forms] for details.
735 */
736 for (bd = gen->compact_objects; bd; bd = next) {
737 next = bd->link;
738 compactFree(((StgCompactNFDataBlock*)bd->start)->owner);
739 }
740 gen->compact_objects = gen->live_compact_objects;
741 gen->n_compact_blocks = gen->n_live_compact_blocks;
742 }
743 else // for generations > N
744 {
745 /* For older generations, we need to append the
746 * scavenged_large_object list (i.e. large objects that have been
747 * promoted during this GC) to the large_object list for that step.
748 */
749 for (bd = gen->scavenged_large_objects; bd; bd = next) {
750 next = bd->link;
751 dbl_link_onto(bd, &gen->large_objects);
752 gen->n_large_words += bd->free - bd->start;
753 }
754
755 // And same for compacts
756 for (bd = gen->live_compact_objects; bd; bd = next) {
757 next = bd->link;
758 dbl_link_onto(bd, &gen->compact_objects);
759 }
760
761 // add the new blocks we promoted during this GC
762 gen->n_large_blocks += gen->n_scavenged_large_blocks;
763 gen->n_compact_blocks += gen->n_live_compact_blocks;
764 }
765
766 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
767 ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
768 // We can run the same assertion on compact objects because there
769 // is memory "the GC doesn't see" (directly), but which is still
770 // accounted in gen->n_compact_blocks
771
772 gen->scavenged_large_objects = NULL;
773 gen->n_scavenged_large_blocks = 0;
774 gen->live_compact_objects = NULL;
775 gen->n_live_compact_blocks = 0;
776
777 // Count "live" data
778 live_words += genLiveWords(gen);
779 live_blocks += genLiveBlocks(gen);
780
781 // add in the partial blocks in the gen_workspaces
782 {
783 uint32_t i;
784 for (i = 0; i < n_capabilities; i++) {
785 live_words += gcThreadLiveWords(i, gen->no);
786 live_blocks += gcThreadLiveBlocks(i, gen->no);
787 }
788 }
789 } // for all generations
790
791 // Flush the update remembered sets. See Note [Eager update remembered set
792 // flushing] in NonMovingMark.c
793 if (RtsFlags.GcFlags.useNonmoving) {
794 RELEASE_SM_LOCK;
795 for (n = 0; n < n_capabilities; n++) {
796 nonmovingAddUpdRemSetBlocks(&capabilities[n]->upd_rem_set.queue);
797 }
798 ACQUIRE_SM_LOCK;
799 }
800
801 // Mark and sweep the oldest generation.
802 // N.B. This can only happen after we've moved
803 // oldest_gen->scavenged_large_objects back to oldest_gen->large_objects.
804 ASSERT(oldest_gen->scavenged_large_objects == NULL);
805 if (RtsFlags.GcFlags.useNonmoving && major_gc) {
806 // All threads in non-moving heap should be found to be alive, becuase
807 // threads in the non-moving generation's list should live in the
808 // non-moving heap, and we consider non-moving objects alive during
809 // preparation.
810 ASSERT(oldest_gen->old_threads == END_TSO_QUEUE);
811 // For weaks, remember that we evacuated all weaks to the non-moving heap
812 // in markWeakPtrList(), and then moved the weak_ptr_list list to
813 // old_weak_ptr_list. We then moved weaks with live keys to the
814 // weak_ptr_list again. Then, in collectDeadWeakPtrs() we moved weaks in
815 // old_weak_ptr_list to dead_weak_ptr_list. So at this point
816 // old_weak_ptr_list should be empty.
817 ASSERT(oldest_gen->old_weak_ptr_list == NULL);
818
819 // we may need to take the lock to allocate mark queue blocks
820 RELEASE_SM_LOCK;
821 // dead_weak_ptr_list contains weak pointers with dead keys. Those need to
822 // be kept alive because we'll use them in finalizeSchedulers(). Similarly
823 // resurrected_threads are also going to be used in resurrectedThreads()
824 // so we need to mark those too.
825 // Note that in sequential case these lists will be appended with more
826 // weaks and threads found to be dead in mark.
827 #if !defined(THREADED_RTS)
828 // In the non-threaded runtime this is the only time we push to the
829 // upd_rem_set
830 nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
831 #endif
832 nonmovingCollect(&dead_weak_ptr_list, &resurrected_threads);
833 ACQUIRE_SM_LOCK;
834 }
835
836 // Update the max size of older generations after a major GC:
837 // We can't resize here in the case of the concurrent collector since we
838 // don't yet know how much live data we have. This will be instead done
839 // once we finish marking.
840 if (major_gc && RtsFlags.GcFlags.generations > 1 && ! RtsFlags.GcFlags.useNonmoving)
841 resizeGenerations();
842
843 // Free the mark stack.
844 if (mark_stack_top_bd != NULL) {
845 debugTrace(DEBUG_gc, "mark stack: %d blocks",
846 countBlocks(mark_stack_top_bd));
847 freeChain(mark_stack_top_bd);
848 }
849
850 // Free any bitmaps.
851 for (g = 0; g <= N; g++) {
852 gen = &generations[g];
853 if (gen->bitmap != NULL) {
854 freeGroup(gen->bitmap);
855 gen->bitmap = NULL;
856 }
857 }
858
859 resize_nursery();
860
861 resetNurseries();
862
863 #if defined(DEBUG)
864 // Mark the garbage collected CAFs as dead. Done in `nonmovingGcCafs()` when
865 // non-moving GC is enabled.
866 if (major_gc && !RtsFlags.GcFlags.useNonmoving) {
867 gcCAFs();
868 }
869 #endif
870
871 // Update the stable name hash table
872 updateStableNameTable(major_gc);
873
874 // unlock the StablePtr table. Must be before scheduleFinalizers(),
875 // because a finalizer may call hs_free_fun_ptr() or
876 // hs_free_stable_ptr(), both of which access the StablePtr table.
877 stablePtrUnlock();
878
879 // Unload dynamically-loaded object code after a major GC.
880 // See Note [Object unloading] in CheckUnload.c for details.
881 //
882 // TODO: Similar to `nonmovingGcCafs` non-moving GC should have its own
883 // collector for these objects, but that's currently not implemented, so we
884 // simply don't unload object code when non-moving GC is enabled.
885 if (major_gc && !RtsFlags.GcFlags.useNonmoving) {
886 checkUnload();
887 }
888
889 #if defined(PROFILING)
890 // resetStaticObjectForProfiling() must be called before
891 // zeroing below.
892
893 // ToDo: fix the gct->scavenged_static_objects below
894 resetStaticObjectForProfiling(gct->scavenged_static_objects);
895 #endif
896
897 // Start any pending finalizers. Must be after
898 // updateStableTables() and stableUnlock() (see #4221).
899 RELEASE_SM_LOCK;
900 scheduleFinalizers(cap, dead_weak_ptr_list);
901 ACQUIRE_SM_LOCK;
902
903 // check sanity after GC
904 // before resurrectThreads(), because that might overwrite some
905 // closures, which will cause problems with THREADED where we don't
906 // fill slop. If we are using the nonmoving collector then we can't claim to
907 // be *after* the major GC; it's now running concurrently.
908 IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc && !RtsFlags.GcFlags.useNonmoving));
909
910 // If a heap census is due, we need to do it before
911 // resurrectThreads(), for the same reason as checkSanity above:
912 // resurrectThreads() will overwrite some closures and leave slop
913 // behind.
914 if (do_heap_census) {
915 debugTrace(DEBUG_sched, "performing heap census");
916 RELEASE_SM_LOCK;
917 heapCensus(mut_time);
918 ACQUIRE_SM_LOCK;
919 }
920
921 // send exceptions to any threads which were about to die
922 RELEASE_SM_LOCK;
923 resurrectThreads(resurrected_threads);
924 ACQUIRE_SM_LOCK;
925
926 if (major_gc) {
927 W_ need_prealloc, need_live, need, got;
928 uint32_t i;
929
930 need_live = 0;
931 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
932 need_live += genLiveBlocks(&generations[i]);
933 }
934 need_live = stg_max(RtsFlags.GcFlags.minOldGenSize, need_live);
935
936 need_prealloc = 0;
937 for (i = 0; i < n_nurseries; i++) {
938 need_prealloc += nurseries[i].n_blocks;
939 }
940 need_prealloc += RtsFlags.GcFlags.largeAllocLim;
941 need_prealloc += countAllocdBlocks(exec_block);
942 need_prealloc += arenaBlocks();
943 #if defined(PROFILING)
944 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
945 need_prealloc = retainerStackBlocks();
946 }
947 #endif
948
949 /* If the amount of data remains constant, next major GC we'll
950 * require (F+1)*live + prealloc. We leave (F+2)*live + prealloc
951 * in order to reduce repeated deallocation and reallocation. #14702
952 */
953 need = need_prealloc + (RtsFlags.GcFlags.oldGenFactor + 2) * need_live;
954
955 /* Also, if user set heap size, do not drop below it.
956 */
957 need = stg_max(RtsFlags.GcFlags.heapSizeSuggestion, need);
958
959 /* But with a large nursery, the above estimate might exceed
960 * maxHeapSize. A large resident set size might make the OS
961 * kill this process, or swap unnecessarily. Therefore we
962 * ensure that our estimate does not exceed maxHeapSize.
963 */
964 if (RtsFlags.GcFlags.maxHeapSize != 0) {
965 need = stg_min(RtsFlags.GcFlags.maxHeapSize, need);
966 }
967
968 need = BLOCKS_TO_MBLOCKS(need);
969
970 got = mblocks_allocated;
971
972 if (got > need) {
973 returnMemoryToOS(got - need);
974 }
975 }
976
977 // extra GC trace info
978 IF_DEBUG(gc, statDescribeGens());
979
980 #if defined(DEBUG)
981 // symbol-table based profiling
982 /* heapCensus(to_blocks); */ /* ToDo */
983 #endif
984
985 // restore enclosing cost centre
986 #if defined(PROFILING)
987 for (n = 0; n < n_capabilities; n++) {
988 capabilities[n]->r.rCCCS = save_CCS[n];
989 }
990 #endif
991
992 #if defined(DEBUG)
993 // check for memory leaks if DEBUG is on
994 memInventory(DEBUG_gc);
995 #endif
996
997 // ok, GC over: tell the stats department what happened.
998 stat_endGCWorker(cap, gct);
999 stat_endGC(cap, gct, live_words, copied,
1000 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
1001 N, n_gc_threads, gc_threads,
1002 par_max_copied, par_balanced_copied,
1003 gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
1004 any_work, no_work, scav_find_work);
1005
1006 #if defined(RTS_USER_SIGNALS)
1007 if (RtsFlags.MiscFlags.install_signal_handlers) {
1008 // unblock signals again
1009 unblockUserSignals();
1010 }
1011 #endif
1012
1013 RELEASE_SM_LOCK;
1014
1015 SET_GCT(saved_gct);
1016 }
1017
1018 /* -----------------------------------------------------------------------------
1019 Heap overflow is indicated by setting a flag that the caller of
1020 GarbageCollect can check. (not ideal, TODO: better)
1021 -------------------------------------------------------------------------- */
1022
heapOverflow(void)1023 static void heapOverflow(void)
1024 {
1025 heap_overflow = true;
1026 }
1027
1028 /* -----------------------------------------------------------------------------
1029 Initialise the gc_thread structures.
1030 -------------------------------------------------------------------------- */
1031
1032 static void
new_gc_thread(uint32_t n,gc_thread * t)1033 new_gc_thread (uint32_t n, gc_thread *t)
1034 {
1035 uint32_t g;
1036 gen_workspace *ws;
1037
1038 t->cap = capabilities[n];
1039
1040 #if defined(THREADED_RTS)
1041 t->id = 0;
1042 initSpinLock(&t->gc_spin);
1043 initSpinLock(&t->mut_spin);
1044 ACQUIRE_SPIN_LOCK(&t->gc_spin);
1045 ACQUIRE_SPIN_LOCK(&t->mut_spin);
1046 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
1047 // thread to start up, see wakeup_gc_threads
1048 #endif
1049
1050 t->thread_index = n;
1051 t->free_blocks = NULL;
1052 t->gc_count = 0;
1053
1054 init_gc_thread(t);
1055
1056 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
1057 {
1058 ws = &t->gens[g];
1059 ws->gen = &generations[g];
1060 ASSERT(g == ws->gen->no);
1061 ws->my_gct = t;
1062
1063 // We want to call
1064 // alloc_todo_block(ws,0);
1065 // but can't, because it uses gct which isn't set up at this point.
1066 // Hence, allocate a block for todo_bd manually:
1067 {
1068 bdescr *bd = allocBlockOnNode(capNoToNumaNode(n));
1069 // no lock, locks aren't initialised yet
1070 initBdescr(bd, ws->gen, ws->gen->to);
1071 bd->flags = BF_EVACUATED;
1072 bd->u.scan = bd->free = bd->start;
1073
1074 ws->todo_bd = bd;
1075 ws->todo_free = bd->free;
1076 ws->todo_lim = bd->start + BLOCK_SIZE_W;
1077 }
1078
1079 ws->todo_q = newWSDeque(128);
1080 ws->todo_overflow = NULL;
1081 ws->n_todo_overflow = 0;
1082 ws->todo_large_objects = NULL;
1083 ws->todo_seg = END_NONMOVING_TODO_LIST;
1084
1085 ws->part_list = NULL;
1086 ws->n_part_blocks = 0;
1087 ws->n_part_words = 0;
1088
1089 ws->scavd_list = NULL;
1090 ws->n_scavd_blocks = 0;
1091 ws->n_scavd_words = 0;
1092 }
1093 }
1094
1095
1096 void
initGcThreads(uint32_t from USED_IF_THREADS,uint32_t to USED_IF_THREADS)1097 initGcThreads (uint32_t from USED_IF_THREADS, uint32_t to USED_IF_THREADS)
1098 {
1099 #if defined(THREADED_RTS)
1100 uint32_t i;
1101
1102 if (from > 0) {
1103 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
1104 "initGcThreads");
1105 } else {
1106 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
1107 "initGcThreads");
1108 }
1109
1110 for (i = from; i < to; i++) {
1111 gc_threads[i] =
1112 stgMallocBytes(sizeof(gc_thread) +
1113 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
1114 "alloc_gc_threads");
1115
1116 new_gc_thread(i, gc_threads[i]);
1117 }
1118 #else
1119 ASSERT(from == 0 && to == 1);
1120 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
1121 gc_threads[0] = gct;
1122 new_gc_thread(0,gc_threads[0]);
1123 #endif
1124 }
1125
1126 void
freeGcThreads(void)1127 freeGcThreads (void)
1128 {
1129 uint32_t g;
1130 if (gc_threads != NULL) {
1131 #if defined(THREADED_RTS)
1132 uint32_t i;
1133 for (i = 0; i < n_capabilities; i++) {
1134 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
1135 {
1136 freeWSDeque(gc_threads[i]->gens[g].todo_q);
1137 }
1138 stgFree (gc_threads[i]);
1139 }
1140 stgFree (gc_threads);
1141 #else
1142 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
1143 {
1144 freeWSDeque(gc_threads[0]->gens[g].todo_q);
1145 }
1146 stgFree (gc_threads);
1147 #endif
1148 gc_threads = NULL;
1149 }
1150 }
1151
1152 /* ----------------------------------------------------------------------------
1153 Start GC threads
1154 ------------------------------------------------------------------------- */
1155
1156 static volatile StgWord gc_running_threads;
1157
1158 static StgWord
inc_running(void)1159 inc_running (void)
1160 {
1161 StgWord new;
1162 new = atomic_inc(&gc_running_threads, 1);
1163 ASSERT(new <= n_gc_threads);
1164 return new;
1165 }
1166
1167 static StgWord
dec_running(void)1168 dec_running (void)
1169 {
1170 ASSERT(RELAXED_LOAD(&gc_running_threads) != 0);
1171 return atomic_dec(&gc_running_threads);
1172 }
1173
1174 static bool
any_work(void)1175 any_work (void)
1176 {
1177 int g;
1178 gen_workspace *ws;
1179
1180 NONATOMIC_ADD(&gct->any_work, 1);
1181
1182 write_barrier();
1183
1184 // scavenge objects in compacted generation
1185 if (mark_stack_bd != NULL && !mark_stack_empty()) {
1186 return true;
1187 }
1188
1189 // Check for global work in any gen. We don't need to check for
1190 // local work, because we have already exited scavenge_loop(),
1191 // which means there is no local work for this thread.
1192 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
1193 ws = &gct->gens[g];
1194 if (ws->todo_large_objects) return true;
1195 if (!looksEmptyWSDeque(ws->todo_q)) return true;
1196 if (ws->todo_overflow) return true;
1197 }
1198
1199 #if defined(THREADED_RTS)
1200 if (work_stealing) {
1201 uint32_t n;
1202 // look for work to steal
1203 for (n = 0; n < n_gc_threads; n++) {
1204 if (n == gct->thread_index) continue;
1205 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
1206 ws = &gc_threads[n]->gens[g];
1207 if (!looksEmptyWSDeque(ws->todo_q)) return true;
1208 }
1209 }
1210 }
1211 #endif
1212
1213 __atomic_fetch_add(&gct->no_work, 1, __ATOMIC_RELAXED);
1214 #if defined(THREADED_RTS)
1215 yieldThread();
1216 #endif
1217
1218 return false;
1219 }
1220
1221 static void
scavenge_until_all_done(void)1222 scavenge_until_all_done (void)
1223 {
1224 DEBUG_ONLY( uint32_t r );
1225
1226
1227 loop:
1228 #if defined(THREADED_RTS)
1229 if (n_gc_threads > 1) {
1230 scavenge_loop();
1231 } else {
1232 scavenge_loop1();
1233 }
1234 #else
1235 scavenge_loop();
1236 #endif
1237
1238 collect_gct_blocks();
1239
1240 // scavenge_loop() only exits when there's no work to do
1241
1242 // This atomic decrement also serves as a full barrier to ensure that any
1243 // writes we made during scavenging are visible to other threads.
1244 #if defined(DEBUG)
1245 r = dec_running();
1246 #else
1247 dec_running();
1248 #endif
1249
1250 traceEventGcIdle(gct->cap);
1251
1252 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1253
1254 while (SEQ_CST_LOAD(&gc_running_threads) != 0) {
1255 // usleep(1);
1256 if (any_work()) {
1257 inc_running();
1258 traceEventGcWork(gct->cap);
1259 goto loop;
1260 }
1261 // any_work() does not remove the work from the queue, it
1262 // just checks for the presence of work. If we find any,
1263 // then we increment gc_running_threads and go back to
1264 // scavenge_loop() to perform any pending work.
1265 }
1266
1267 traceEventGcDone(gct->cap);
1268 }
1269
1270 #if defined(THREADED_RTS)
1271
1272 void
gcWorkerThread(Capability * cap)1273 gcWorkerThread (Capability *cap)
1274 {
1275 gc_thread *saved_gct;
1276
1277 // necessary if we stole a callee-saves register for gct:
1278 saved_gct = gct;
1279
1280 SET_GCT(gc_threads[cap->no]);
1281 gct->id = osThreadId();
1282 stat_startGCWorker (cap, gct);
1283
1284 // Wait until we're told to wake up
1285 RELEASE_SPIN_LOCK(&gct->mut_spin);
1286 // yieldThread();
1287 // Strangely, adding a yieldThread() here makes the CPU time
1288 // measurements more accurate on Linux, perhaps because it syncs
1289 // the CPU time across the multiple cores. Without this, CPU time
1290 // is heavily skewed towards GC rather than MUT.
1291 SEQ_CST_STORE(&gct->wakeup, GC_THREAD_STANDING_BY);
1292 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1293 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1294
1295 init_gc_thread(gct);
1296
1297 traceEventGcWork(gct->cap);
1298
1299 // Every thread evacuates some roots.
1300 gct->evac_gen_no = 0;
1301 markCapability(mark_root, gct, cap, true/*prune sparks*/);
1302 scavenge_capability_mut_lists(cap);
1303
1304 scavenge_until_all_done();
1305
1306 #if defined(THREADED_RTS)
1307 // Now that the whole heap is marked, we discard any sparks that
1308 // were found to be unreachable. The main GC thread is currently
1309 // marking heap reachable via weak pointers, so it is
1310 // non-deterministic whether a spark will be retained if it is
1311 // only reachable via weak pointers. To fix this problem would
1312 // require another GC barrier, which is too high a price.
1313 pruneSparkQueue(false, cap);
1314 #endif
1315
1316 // Wait until we're told to continue
1317 RELEASE_SPIN_LOCK(&gct->gc_spin);
1318 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1319 gct->thread_index);
1320 stat_endGCWorker (cap, gct);
1321 // This must come *after* stat_endGCWorker since it serves to
1322 // synchronize us with the GC leader, which will later aggregate the
1323 // GC statistics.
1324 SEQ_CST_STORE(&gct->wakeup, GC_THREAD_WAITING_TO_CONTINUE);
1325 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1326 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1327
1328 SET_GCT(saved_gct);
1329 }
1330
1331 #endif
1332
1333 #if defined(THREADED_RTS)
1334
1335 void
waitForGcThreads(Capability * cap USED_IF_THREADS,bool idle_cap[])1336 waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
1337 {
1338 const uint32_t n_threads = n_capabilities;
1339 const uint32_t me = cap->no;
1340 uint32_t i, j;
1341 bool retry = true;
1342 Time t0, t1, t2;
1343
1344 t0 = t1 = t2 = getProcessElapsedTime();
1345
1346 while(retry) {
1347 for (i=0; i < n_threads; i++) {
1348 if (i == me || idle_cap[i]) continue;
1349 if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) {
1350 prodCapability(capabilities[i], cap->running_task);
1351 }
1352 }
1353 for (j=0; j < 10; j++) {
1354 retry = false;
1355 for (i=0; i < n_threads; i++) {
1356 if (i == me || idle_cap[i]) continue;
1357 write_barrier();
1358 interruptCapability(capabilities[i]);
1359 if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) {
1360 retry = true;
1361 }
1362 }
1363 if (!retry) break;
1364 #if defined(PROF_SPIN)
1365 waitForGcThreads_yield++;
1366 #endif
1367 yieldThread();
1368 }
1369
1370 t2 = getProcessElapsedTime();
1371 if (RtsFlags.GcFlags.longGCSync != 0 &&
1372 t2 - t1 > RtsFlags.GcFlags.longGCSync) {
1373 /* call this every longGCSync of delay */
1374 rtsConfig.longGCSync(cap->no, t2 - t0);
1375 t1 = t2;
1376 }
1377 if (retry) {
1378 #if defined(PROF_SPIN)
1379 // This is a bit strange, we'll get more yields than spins.
1380 // I guess that means it's not a spin-lock at all, but these
1381 // numbers are still useful (I think).
1382 waitForGcThreads_spin++;
1383 #endif
1384 }
1385 }
1386
1387 if (RtsFlags.GcFlags.longGCSync != 0 &&
1388 t2 - t0 > RtsFlags.GcFlags.longGCSync) {
1389 rtsConfig.longGCSyncEnd(t2 - t0);
1390 }
1391 }
1392
1393 #endif // THREADED_RTS
1394
1395 static void
start_gc_threads(void)1396 start_gc_threads (void)
1397 {
1398 #if defined(THREADED_RTS)
1399 gc_running_threads = 0;
1400 #endif
1401 }
1402
1403 static void
wakeup_gc_threads(uint32_t me USED_IF_THREADS,bool idle_cap[]USED_IF_THREADS)1404 wakeup_gc_threads (uint32_t me USED_IF_THREADS,
1405 bool idle_cap[] USED_IF_THREADS)
1406 {
1407 #if defined(THREADED_RTS)
1408 uint32_t i;
1409
1410 if (n_gc_threads == 1) return;
1411
1412 for (i=0; i < n_gc_threads; i++) {
1413 if (i == me || idle_cap[i]) continue;
1414 inc_running();
1415 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1416 if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY)
1417 barf("wakeup_gc_threads");
1418
1419 SEQ_CST_STORE(&gc_threads[i]->wakeup, GC_THREAD_RUNNING);
1420 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1421 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1422 }
1423 #endif
1424 }
1425
1426 // After GC is complete, we must wait for all GC threads to enter the
1427 // standby state, otherwise they may still be executing inside
1428 // any_work(), and may even remain awake until the next GC starts.
1429 static void
shutdown_gc_threads(uint32_t me USED_IF_THREADS,bool idle_cap[]USED_IF_THREADS)1430 shutdown_gc_threads (uint32_t me USED_IF_THREADS,
1431 bool idle_cap[] USED_IF_THREADS)
1432 {
1433 #if defined(THREADED_RTS)
1434 uint32_t i;
1435
1436 if (n_gc_threads == 1) return;
1437
1438 for (i=0; i < n_gc_threads; i++) {
1439 if (i == me || idle_cap[i]) continue;
1440 while (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_WAITING_TO_CONTINUE) {
1441 busy_wait_nop();
1442 }
1443 }
1444 #endif
1445 }
1446
1447 #if defined(THREADED_RTS)
1448 void
releaseGCThreads(Capability * cap USED_IF_THREADS,bool idle_cap[])1449 releaseGCThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
1450 {
1451 const uint32_t n_threads = n_capabilities;
1452 const uint32_t me = cap->no;
1453 uint32_t i;
1454 for (i=0; i < n_threads; i++) {
1455 if (i == me || idle_cap[i]) continue;
1456 if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_WAITING_TO_CONTINUE)
1457 barf("releaseGCThreads");
1458
1459 SEQ_CST_STORE(&gc_threads[i]->wakeup, GC_THREAD_INACTIVE);
1460 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1461 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1462 }
1463 }
1464 #endif
1465
1466 /* ----------------------------------------------------------------------------
1467 Save the mutable lists in saved_mut_lists where it will be scavenged
1468 during GC
1469 ------------------------------------------------------------------------- */
1470
1471 static void
stash_mut_list(Capability * cap,uint32_t gen_no)1472 stash_mut_list (Capability *cap, uint32_t gen_no)
1473 {
1474 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1475 RELEASE_STORE(&cap->mut_lists[gen_no], allocBlockOnNode_sync(cap->node));
1476 }
1477
1478 /* ----------------------------------------------------------------------------
1479 Initialise a generation that is to be collected
1480 ------------------------------------------------------------------------- */
1481
1482 static void
prepare_collected_gen(generation * gen)1483 prepare_collected_gen (generation *gen)
1484 {
1485 uint32_t i, g, n;
1486 gen_workspace *ws;
1487 bdescr *bd, *next;
1488
1489 g = gen->no;
1490
1491 if (RtsFlags.GcFlags.useNonmoving && g == oldest_gen->no) {
1492 // Nonmoving heap's mutable list is always a root.
1493 for (i = 0; i < n_capabilities; i++) {
1494 stash_mut_list(capabilities[i], g);
1495 }
1496 } else if (g != 0) {
1497 // Otherwise throw away the current mutable list. Invariant: the
1498 // mutable list always has at least one block; this means we can avoid
1499 // a check for NULL in recordMutable().
1500 for (i = 0; i < n_capabilities; i++) {
1501 bdescr *old = RELAXED_LOAD(&capabilities[i]->mut_lists[g]);
1502 freeChain(old);
1503
1504 bdescr *new = allocBlockOnNode(capNoToNumaNode(i));
1505 RELAXED_STORE(&capabilities[i]->mut_lists[g], new);
1506 }
1507 }
1508
1509 gen = &generations[g];
1510 ASSERT(gen->no == g);
1511
1512 // we'll construct a new list of threads in this step
1513 // during GC, throw away the current list.
1514 gen->old_threads = gen->threads;
1515 gen->threads = END_TSO_QUEUE;
1516
1517 // deprecate the existing blocks (except in the case of the nonmoving
1518 // collector since these will be preserved in nonmovingCollect for the
1519 // concurrent GC).
1520 if (!(RtsFlags.GcFlags.useNonmoving && g == oldest_gen->no)) {
1521 gen->old_blocks = gen->blocks;
1522 gen->n_old_blocks = gen->n_blocks;
1523 gen->blocks = NULL;
1524 gen->n_blocks = 0;
1525 gen->n_words = 0;
1526 gen->live_estimate = 0;
1527 }
1528
1529 // initialise the large object queues.
1530 ASSERT(gen->scavenged_large_objects == NULL);
1531 ASSERT(gen->n_scavenged_large_blocks == 0);
1532 ASSERT(gen->live_compact_objects == NULL);
1533 ASSERT(gen->n_live_compact_blocks == 0);
1534
1535 // grab all the partial blocks stashed in the gc_thread workspaces and
1536 // move them to the old_blocks list of this gen.
1537 for (n = 0; n < n_capabilities; n++) {
1538 ws = &gc_threads[n]->gens[gen->no];
1539
1540 for (bd = ws->part_list; bd != NULL; bd = next) {
1541 next = bd->link;
1542 bd->link = gen->old_blocks;
1543 gen->old_blocks = bd;
1544 gen->n_old_blocks += bd->blocks;
1545 }
1546 ws->part_list = NULL;
1547 ws->n_part_blocks = 0;
1548 ws->n_part_words = 0;
1549
1550 ASSERT(ws->scavd_list == NULL);
1551 ASSERT(ws->n_scavd_blocks == 0);
1552 ASSERT(ws->n_scavd_words == 0);
1553
1554 if (ws->todo_free != ws->todo_bd->start) {
1555 ws->todo_bd->free = ws->todo_free;
1556 ws->todo_bd->link = gen->old_blocks;
1557 gen->old_blocks = ws->todo_bd;
1558 gen->n_old_blocks += ws->todo_bd->blocks;
1559 alloc_todo_block(ws,0); // always has one block.
1560 }
1561 }
1562
1563 // mark the small objects as from-space
1564 for (bd = gen->old_blocks; bd; bd = bd->link) {
1565 bd->flags &= ~BF_EVACUATED;
1566 }
1567
1568 // mark the large objects as from-space
1569 for (bd = gen->large_objects; bd; bd = bd->link) {
1570 bd->flags &= ~BF_EVACUATED;
1571 }
1572
1573 // mark the compact objects as from-space
1574 for (bd = gen->compact_objects; bd; bd = bd->link) {
1575 bd->flags &= ~BF_EVACUATED;
1576 }
1577
1578 // for a compacted generation, we need to allocate the bitmap
1579 if (gen->mark) {
1580 StgWord bitmap_size; // in bytes
1581 bdescr *bitmap_bdescr;
1582 StgWord *bitmap;
1583
1584 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / BITS_IN(W_);
1585
1586 if (bitmap_size > 0) {
1587 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1588 / BLOCK_SIZE);
1589 gen->bitmap = bitmap_bdescr;
1590 bitmap = bitmap_bdescr->start;
1591
1592 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1593 bitmap_size, bitmap);
1594
1595 // don't forget to fill it with zeros!
1596 memset(bitmap, 0, bitmap_size);
1597
1598 // For each block in this step, point to its bitmap from the
1599 // block descriptor.
1600 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1601 bd->u.bitmap = bitmap;
1602 bitmap += BLOCK_SIZE_W / BITS_IN(W_);
1603
1604 // Also at this point we set the BF_MARKED flag
1605 // for this block. The invariant is that
1606 // BF_MARKED is always unset, except during GC
1607 // when it is set on those blocks which will be
1608 // compacted.
1609 if (!(bd->flags & BF_FRAGMENTED)) {
1610 bd->flags |= BF_MARKED;
1611 }
1612
1613 // BF_SWEPT should be marked only for blocks that are being
1614 // collected in sweep()
1615 bd->flags &= ~BF_SWEPT;
1616 }
1617 }
1618 }
1619 }
1620
1621 /* ----------------------------------------------------------------------------
1622 Initialise a generation that is *not* to be collected
1623 ------------------------------------------------------------------------- */
1624
1625 static void
prepare_uncollected_gen(generation * gen)1626 prepare_uncollected_gen (generation *gen)
1627 {
1628 uint32_t i;
1629
1630
1631 ASSERT(gen->no > 0);
1632
1633 // save the current mutable lists for this generation, and
1634 // allocate a fresh block for each one. We'll traverse these
1635 // mutable lists as roots early on in the GC.
1636 for (i = 0; i < n_capabilities; i++) {
1637 stash_mut_list(capabilities[i], gen->no);
1638 }
1639
1640 ASSERT(gen->scavenged_large_objects == NULL);
1641 ASSERT(gen->n_scavenged_large_blocks == 0);
1642 }
1643
1644 /* -----------------------------------------------------------------------------
1645 Collect the completed blocks from a GC thread and attach them to
1646 the generation.
1647 -------------------------------------------------------------------------- */
1648
1649 static void
collect_gct_blocks(void)1650 collect_gct_blocks (void)
1651 {
1652 uint32_t g;
1653 gen_workspace *ws;
1654 bdescr *bd, *prev;
1655
1656 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1657 ws = &gct->gens[g];
1658
1659 // there may still be a block attached to ws->todo_bd;
1660 // leave it there to use next time.
1661
1662 if (ws->scavd_list != NULL) {
1663 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1664
1665 ASSERT(gct->scan_bd == NULL);
1666 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1667
1668 prev = NULL;
1669 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1670 prev = bd;
1671 }
1672 if (prev != NULL) {
1673 prev->link = ws->gen->blocks;
1674 ws->gen->blocks = ws->scavd_list;
1675 }
1676 ws->gen->n_blocks += ws->n_scavd_blocks;
1677 ws->gen->n_words += ws->n_scavd_words;
1678
1679 ws->scavd_list = NULL;
1680 ws->n_scavd_blocks = 0;
1681 ws->n_scavd_words = 0;
1682
1683 RELEASE_SPIN_LOCK(&ws->gen->sync);
1684 }
1685 }
1686 }
1687
1688 /* -----------------------------------------------------------------------------
1689 During mutation, any blocks that are filled by allocatePinned() are stashed
1690 on the local pinned_object_blocks list, to avoid needing to take a global
1691 lock. Here we collect those blocks from the cap->pinned_object_blocks lists
1692 and put them on the g0->large_object or oldest_gen->large_objects.
1693
1694 How to decide which list to put them on?
1695
1696 - When non-moving heap is enabled and this is a major GC, we put them on
1697 oldest_gen. This is because after preparation we really want no
1698 old-to-young references, and we want to be able to reset mut_lists. For
1699 this we need to promote every potentially live object to the oldest gen.
1700
1701 - Otherwise we put them on g0.
1702 -------------------------------------------------------------------------- */
1703
1704 static void
collect_pinned_object_blocks(void)1705 collect_pinned_object_blocks (void)
1706 {
1707 const bool use_nonmoving = RtsFlags.GcFlags.useNonmoving;
1708 generation *const gen = (use_nonmoving && major_gc) ? oldest_gen : g0;
1709
1710 for (uint32_t n = 0; n < n_capabilities; n++) {
1711 bdescr *last = NULL;
1712 if (use_nonmoving && gen == oldest_gen) {
1713 // Mark objects as belonging to the nonmoving heap
1714 for (bdescr *bd = RELAXED_LOAD(&capabilities[n]->pinned_object_blocks); bd != NULL; bd = bd->link) {
1715 bd->flags |= BF_NONMOVING;
1716 bd->gen = oldest_gen;
1717 bd->gen_no = oldest_gen->no;
1718 oldest_gen->n_large_words += bd->free - bd->start;
1719 oldest_gen->n_large_blocks += bd->blocks;
1720 last = bd;
1721 }
1722 } else {
1723 for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
1724 last = bd;
1725 }
1726 }
1727
1728 if (last != NULL) {
1729 last->link = gen->large_objects;
1730 if (gen->large_objects != NULL) {
1731 gen->large_objects->u.back = last;
1732 }
1733 gen->large_objects = RELAXED_LOAD(&capabilities[n]->pinned_object_blocks);
1734 RELAXED_STORE(&capabilities[n]->pinned_object_blocks, NULL);
1735 }
1736 }
1737 }
1738
1739 /* -----------------------------------------------------------------------------
1740 Initialise a gc_thread before GC
1741 -------------------------------------------------------------------------- */
1742
1743 static void
init_gc_thread(gc_thread * t)1744 init_gc_thread (gc_thread *t)
1745 {
1746 t->static_objects = END_OF_STATIC_OBJECT_LIST;
1747 t->scavenged_static_objects = END_OF_STATIC_OBJECT_LIST;
1748 t->scan_bd = NULL;
1749 t->mut_lists = t->cap->mut_lists;
1750 t->evac_gen_no = 0;
1751 t->failed_to_evac = false;
1752 t->eager_promotion = true;
1753 t->thunk_selector_depth = 0;
1754 t->copied = 0;
1755 t->scanned = 0;
1756 t->any_work = 0;
1757 t->no_work = 0;
1758 t->scav_find_work = 0;
1759 }
1760
1761 /* -----------------------------------------------------------------------------
1762 Function we pass to evacuate roots.
1763 -------------------------------------------------------------------------- */
1764
1765 static void
mark_root(void * user USED_IF_THREADS,StgClosure ** root)1766 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1767 {
1768 // we stole a register for gct, but this function is called from
1769 // *outside* the GC where the register variable is not in effect,
1770 // so we need to save and restore it here. NB. only call
1771 // mark_root() from the main GC thread, otherwise gct will be
1772 // incorrect.
1773 #if defined(THREADED_RTS)
1774 gc_thread *saved_gct;
1775 saved_gct = gct;
1776 #endif
1777 SET_GCT(user);
1778
1779 evacuate(root);
1780
1781 SET_GCT(saved_gct);
1782 }
1783
1784 /* ----------------------------------------------------------------------------
1785 Reset the sizes of the older generations when we do a major
1786 collection.
1787
1788 CURRENT STRATEGY: make all generations except zero the same size.
1789 We have to stay within the maximum heap size, and leave a certain
1790 percentage of the maximum heap size available to allocate into.
1791 ------------------------------------------------------------------------- */
1792
1793 void
resizeGenerations(void)1794 resizeGenerations (void)
1795 {
1796 uint32_t g;
1797 W_ live, size, min_alloc, words;
1798 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1799 const W_ gens = RtsFlags.GcFlags.generations;
1800
1801 // live in the oldest generations
1802 if (oldest_gen->live_estimate != 0) {
1803 words = oldest_gen->live_estimate;
1804 } else {
1805 words = oldest_gen->n_words;
1806 }
1807 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1808 oldest_gen->n_large_blocks +
1809 oldest_gen->n_compact_blocks;
1810
1811 // default max size for all generations except zero
1812 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1813 RtsFlags.GcFlags.minOldGenSize);
1814
1815 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1816 if (max > 0) {
1817 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1818 } else {
1819 RtsFlags.GcFlags.heapSizeSuggestion = size;
1820 }
1821 }
1822
1823 // minimum size for generation zero
1824 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1825 RtsFlags.GcFlags.minAllocAreaSize
1826 * (W_)n_capabilities);
1827
1828 // Auto-enable compaction when the residency reaches a
1829 // certain percentage of the maximum heap size (default: 30%).
1830 // Except when non-moving GC is enabled.
1831 if (!RtsFlags.GcFlags.useNonmoving &&
1832 (RtsFlags.GcFlags.compact ||
1833 (max > 0 &&
1834 oldest_gen->n_blocks >
1835 (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1836 oldest_gen->mark = 1;
1837 oldest_gen->compact = 1;
1838 // debugBelch("compaction: on\n", live);
1839 } else {
1840 oldest_gen->mark = 0;
1841 oldest_gen->compact = 0;
1842 // debugBelch("compaction: off\n", live);
1843 }
1844
1845 if (RtsFlags.GcFlags.sweep) {
1846 oldest_gen->mark = 1;
1847 }
1848
1849 // if we're going to go over the maximum heap size, reduce the
1850 // size of the generations accordingly. The calculation is
1851 // different if compaction is turned on, because we don't need
1852 // to double the space required to collect the old generation.
1853 if (max != 0) {
1854
1855 // this test is necessary to ensure that the calculations
1856 // below don't have any negative results - we're working
1857 // with unsigned values here.
1858 if (max < min_alloc) {
1859 heapOverflow();
1860 }
1861
1862 if (oldest_gen->compact) {
1863 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1864 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1865 }
1866 } else {
1867 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1868 size = (max - min_alloc) / ((gens - 1) * 2);
1869 }
1870 }
1871
1872 if (size < live) {
1873 heapOverflow();
1874 }
1875 }
1876
1877 #if 0
1878 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1879 min_alloc, size, max);
1880 debugBelch("resize_gen: n_blocks: %lu, n_large_block: %lu, n_compact_blocks: %lu\n",
1881 oldest_gen->n_blocks, oldest_gen->n_large_blocks, oldest_gen->n_compact_blocks);
1882 debugBelch("resize_gen: max_blocks: %lu -> %lu\n", oldest_gen->max_blocks, oldest_gen->n_blocks);
1883 #endif
1884
1885 for (g = 0; g < gens; g++) {
1886 generations[g].max_blocks = size;
1887 }
1888 }
1889
1890 /* -----------------------------------------------------------------------------
1891 Calculate the new size of the nursery, and resize it.
1892 -------------------------------------------------------------------------- */
1893
1894 static void
resize_nursery(void)1895 resize_nursery (void)
1896 {
1897 const StgWord min_nursery =
1898 RtsFlags.GcFlags.minAllocAreaSize * (StgWord)n_capabilities;
1899
1900 if (RtsFlags.GcFlags.generations == 1)
1901 { // Two-space collector:
1902 W_ blocks;
1903
1904 /* set up a new nursery. Allocate a nursery size based on a
1905 * function of the amount of live data (by default a factor of 2)
1906 * Use the blocks from the old nursery if possible, freeing up any
1907 * left over blocks.
1908 *
1909 * If we get near the maximum heap size, then adjust our nursery
1910 * size accordingly. If the nursery is the same size as the live
1911 * data (L), then we need 3L bytes. We can reduce the size of the
1912 * nursery to bring the required memory down near 2L bytes.
1913 *
1914 * A normal 2-space collector would need 4L bytes to give the same
1915 * performance we get from 3L bytes, reducing to the same
1916 * performance at 2L bytes.
1917 */
1918 blocks = generations[0].n_blocks;
1919
1920 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1921 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1922 RtsFlags.GcFlags.maxHeapSize )
1923 {
1924 long adjusted_blocks; // signed on purpose
1925 int pc_free;
1926
1927 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1928
1929 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1930 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1931
1932 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1933 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1934 {
1935 heapOverflow();
1936 }
1937 blocks = adjusted_blocks;
1938 }
1939 else
1940 {
1941 blocks *= RtsFlags.GcFlags.oldGenFactor;
1942 if (blocks < min_nursery)
1943 {
1944 blocks = min_nursery;
1945 }
1946 }
1947 resizeNurseries(blocks);
1948 }
1949 else // Generational collector
1950 {
1951 /*
1952 * If the user has given us a suggested heap size, adjust our
1953 * allocation area to make best use of the memory available.
1954 */
1955 if (RtsFlags.GcFlags.heapSizeSuggestion)
1956 {
1957 long blocks;
1958 StgWord needed;
1959
1960 calcNeeded(false, &needed); // approx blocks needed at next GC
1961
1962 /* Guess how much will be live in generation 0 step 0 next time.
1963 * A good approximation is obtained by finding the
1964 * percentage of g0 that was live at the last minor GC.
1965 *
1966 * We have an accurate figure for the amount of copied data in
1967 * 'copied', but we must convert this to a number of blocks, with
1968 * a small adjustment for estimated slop at the end of a block
1969 * (- 10 words).
1970 */
1971 if (N == 0)
1972 {
1973 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1974 / countNurseryBlocks();
1975 }
1976
1977 /* Estimate a size for the allocation area based on the
1978 * information available. We might end up going slightly under
1979 * or over the suggested heap size, but we should be pretty
1980 * close on average.
1981 *
1982 * Formula: suggested - needed
1983 * ----------------------------
1984 * 1 + g0_pcnt_kept/100
1985 *
1986 * where 'needed' is the amount of memory needed at the next
1987 * collection for collecting all gens except g0.
1988 */
1989 blocks =
1990 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1991 (100 + (long)g0_pcnt_kept);
1992
1993 if (blocks < (long)min_nursery) {
1994 blocks = min_nursery;
1995 }
1996
1997 resizeNurseries((W_)blocks);
1998 }
1999 else
2000 {
2001 // we might have added extra blocks to the nursery, so
2002 // resize back to the original size again.
2003 resizeNurseriesFixed();
2004 }
2005 }
2006 }
2007
2008 /* -----------------------------------------------------------------------------
2009 Sanity code for CAF garbage collection.
2010
2011 With DEBUG turned on, we manage a CAF list in addition to the SRT
2012 mechanism. After GC, we run down the CAF list and make any
2013 CAFs which have been garbage collected GCD_CAF. This means we get an error
2014 whenever the program tries to enter a garbage collected CAF.
2015
2016 Any garbage collected CAFs are taken off the CAF list at the same
2017 time.
2018 -------------------------------------------------------------------------- */
2019
2020 #if defined(DEBUG)
2021
gcCAFs(void)2022 void gcCAFs(void)
2023 {
2024 uint32_t i = 0;
2025 StgIndStatic *prev = NULL;
2026
2027 for (StgIndStatic *p = debug_caf_list;
2028 p != (StgIndStatic*) END_OF_CAF_LIST;
2029 p = (StgIndStatic*) p->saved_info)
2030 {
2031 const StgInfoTable *info = get_itbl((StgClosure*)p);
2032 ASSERT(info->type == IND_STATIC);
2033
2034 // See Note [STATIC_LINK fields] in Storage.h
2035 // This condition identifies CAFs that have just been GC'd and
2036 // don't have static_link==3 which means they should be ignored.
2037 if ((((StgWord)(p->static_link)&STATIC_BITS) | prev_static_flag) != 3) {
2038 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", p);
2039 SET_INFO((StgClosure*)p,&stg_GCD_CAF_info); // stub it
2040 if (prev == NULL) {
2041 debug_caf_list = (StgIndStatic*)p->saved_info;
2042 } else {
2043 prev->saved_info = p->saved_info;
2044 }
2045 } else {
2046 prev = p;
2047 i++;
2048 }
2049 }
2050
2051 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
2052 }
2053 #endif
2054
2055
2056 /* -----------------------------------------------------------------------------
2057 The GC can leave some work for the mutator to do before the next
2058 GC, provided the work can be safely overlapped with mutation. This
2059 can help reduce the GC pause time.
2060
2061 The mutator can call doIdleGCWork() any time it likes, but
2062 preferably when it is idle. It's safe for multiple capabilities to
2063 call doIdleGCWork().
2064
2065 When 'all' is
2066 * false: doIdleGCWork() should only take a short, bounded, amount
2067 of time.
2068 * true: doIdleGCWork() will complete all the outstanding GC work.
2069
2070 The return value is
2071 * true if there's more to do (only if 'all' is false).
2072 * false otherwise.
2073 -------------------------------------------------------------------------- */
2074
doIdleGCWork(Capability * cap STG_UNUSED,bool all)2075 bool doIdleGCWork(Capability *cap STG_UNUSED, bool all)
2076 {
2077 return runSomeFinalizers(all);
2078 }
2079