1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team 1998-2008
4  *
5  * Generational garbage collector
6  *
7  * Documentation on the architecture of the Garbage Collector can be
8  * found in the online commentary:
9  *
10  *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/storage/gc
11  *
12  * ---------------------------------------------------------------------------*/
13 
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17 
18 #include "GC.h"
19 #include "GCThread.h"
20 #include "GCTDecl.h"            // NB. before RtsSignals.h which
21                                 // clobbers REG_R1 on arm/Linux
22 #include "Compact.h"
23 #include "Evac.h"
24 #include "Scav.h"
25 #include "GCUtils.h"
26 #include "MarkStack.h"
27 #include "MarkWeak.h"
28 #include "Sparks.h"
29 #include "Sweep.h"
30 
31 #include "Arena.h"
32 #include "Storage.h"
33 #include "RtsUtils.h"
34 #include "Apply.h"
35 #include "Updates.h"
36 #include "Stats.h"
37 #include "Schedule.h"
38 #include "Sanity.h"
39 #include "BlockAlloc.h"
40 #include "ProfHeap.h"
41 #include "Weak.h"
42 #include "Prelude.h"
43 #include "RtsSignals.h"
44 #include "STM.h"
45 #include "Trace.h"
46 #include "RetainerProfile.h"
47 #include "LdvProfile.h"
48 #include "RaiseAsync.h"
49 #include "StableName.h"
50 #include "StablePtr.h"
51 #include "CheckUnload.h"
52 #include "CNF.h"
53 #include "RtsFlags.h"
54 #include "NonMoving.h"
55 
56 #include <string.h> // for memset()
57 #include <unistd.h>
58 
59 /* -----------------------------------------------------------------------------
60    Global variables
61    -------------------------------------------------------------------------- */
62 
63 /* STATIC OBJECT LIST.
64  *
65  * During GC:
66  * We maintain a linked list of static objects that are still live.
67  * The requirements for this list are:
68  *
69  *  - we need to scan the list while adding to it, in order to
70  *    scavenge all the static objects (in the same way that
71  *    breadth-first scavenging works for dynamic objects).
72  *
73  *  - we need to be able to tell whether an object is already on
74  *    the list, to break loops.
75  *
76  * Each static object has a "static link field", which we use for
77  * linking objects on to the list.  We use a stack-type list, consing
78  * objects on the front as they are added (this means that the
79  * scavenge phase is depth-first, not breadth-first, but that
80  * shouldn't matter).
81  *
82  * A separate list is kept for objects that have been scavenged
83  * already - this is so that we can zero all the marks afterwards.
84  *
85  * An object is on the list if its static link field is non-zero; this
86  * means that we have to mark the end of the list with '1', not NULL.
87  *
88  * Extra notes for generational GC:
89  *
90  * Each generation has a static object list associated with it.  When
91  * collecting generations up to N, we treat the static object lists
92  * from generations > N as roots.
93  *
94  * We build up a static object list while collecting generations 0..N,
95  * which is then appended to the static object list of generation N+1.
96  *
97  * See also: Note [STATIC_LINK fields] in Storage.h.
98  */
99 
100 /* Hot GC globals
101  * ~~~~~~~~~~~~~~
102  * The globals below are quite hot during GC but read-only, initialized during
103  * the beginning of collection. It is important that they reside in the same
104  * cache-line to minimize unnecessary cache misses.
105  */
106 
107 /* N is the oldest generation being collected, where the generations
108  * are numbered starting at 0.  A major GC (indicated by the major_gc
109  * flag) is when we're collecting all generations.  We only attempt to
110  * deal with static objects and GC CAFs when doing a major GC.
111  */
112 uint32_t N;
113 bool major_gc;
114 bool deadlock_detect_gc;
115 bool unload_mark_needed;
116 
117 /* Data used for allocation area sizing.
118  */
119 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
120 
121 /* Mut-list stats */
122 #if defined(DEBUG)
123 // For lack of a better option we protect mutlist_scav_stats with oldest_gen->sync
124 MutListScavStats mutlist_scav_stats;
125 #endif
126 
127 /* Thread-local data for each GC thread
128  */
129 gc_thread **gc_threads = NULL;
130 
131 #if !defined(THREADED_RTS)
132 // Must be aligned to 64-bytes to meet stated 64-byte alignment of gen_workspace
133 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)]
134     ATTRIBUTE_ALIGNED(64);
135 #endif
136 
137 /* Note [n_gc_threads]
138 This is a global variable that originally tracked the number of threads
139 participating in the current gc. It's meaing has diverged from this somewhate.
140 In practise, it now takes one of the values {1, n_capabilities}. Don't be
141 tricked into thinking this means garbage collections must have 1 or
142 n_capabilities participating: idle capabilities (idle_cap[cap->no]) are included
143 in the n_gc_thread count.
144 
145 Clearly this is in need of some tidying up, but for now we tread carefully. We
146 check n_gc_threads > 1 to see whether we are in a parallel or sequential. We
147 ensure n_gc_threads > 1 before iterating over gc_threads a la:
148 
149 for(i=0;i<n_gc_threads;++i) { foo(gc_thread[i]; )}
150 
151 Omitting this check has led to issues such as #19147.
152 */
153 uint32_t n_gc_threads;
154 
155 // For stats:
156 static long copied;        // *words* copied & scavenged during this GC
157 
158 #if defined(PROF_SPIN) && defined(THREADED_RTS)
159 // spin and yield counts for the quasi-SpinLock in waitForGcThreads
160 volatile StgWord64 waitForGcThreads_spin = 0;
161 volatile StgWord64 waitForGcThreads_yield = 0;
162 volatile StgWord64 whitehole_gc_spin = 0;
163 #endif
164 
165 bool work_stealing;
166 
167 uint32_t static_flag = STATIC_FLAG_B;
168 uint32_t prev_static_flag = STATIC_FLAG_A;
169 
170 DECLARE_GCT
171 
172 /* -----------------------------------------------------------------------------
173    Static function declarations
174    -------------------------------------------------------------------------- */
175 
176 static void mark_root               (void *user, StgClosure **root);
177 static void prepare_collected_gen   (generation *gen);
178 static void prepare_uncollected_gen (generation *gen);
179 static void init_gc_thread          (gc_thread *t);
180 static void resize_nursery          (void);
181 static void start_gc_threads        (void);
182 static void scavenge_until_all_done (void);
183 static StgWord inc_running          (void);
184 static StgWord dec_running          (void);
185 static void wakeup_gc_threads       (uint32_t me, bool idle_cap[]);
186 static void shutdown_gc_threads     (uint32_t me, bool idle_cap[]);
187 static void collect_gct_blocks      (void);
188 static void collect_pinned_object_blocks (void);
189 static void heapOverflow            (void);
190 
191 #if defined(DEBUG)
192 static void gcCAFs                  (void);
193 #endif
194 
195 /* -----------------------------------------------------------------------------
196    The mark stack.
197    -------------------------------------------------------------------------- */
198 
199 bdescr *mark_stack_top_bd; // topmost block in the mark stack
200 bdescr *mark_stack_bd;     // current block in the mark stack
201 StgPtr mark_sp;            // pointer to the next unallocated mark stack entry
202 
203 
204 /* -----------------------------------------------------------------------------
205    Statistics from mut_list scavenging
206    -------------------------------------------------------------------------- */
207 
208 #if defined(DEBUG)
209 void
zeroMutListScavStats(MutListScavStats * src)210 zeroMutListScavStats(MutListScavStats *src)
211 {
212     memset(src, 0, sizeof(MutListScavStats));
213 }
214 
215 void
addMutListScavStats(const MutListScavStats * src,MutListScavStats * dest)216 addMutListScavStats(const MutListScavStats *src,
217                     MutListScavStats *dest)
218 {
219 #define ADD_STATS(field) dest->field += src->field;
220     ADD_STATS(n_MUTVAR);
221     ADD_STATS(n_MUTARR);
222     ADD_STATS(n_MVAR);
223     ADD_STATS(n_TVAR);
224     ADD_STATS(n_TREC_CHUNK);
225     ADD_STATS(n_TVAR_WATCH_QUEUE);
226     ADD_STATS(n_TREC_HEADER);
227     ADD_STATS(n_OTHERS);
228 #undef ADD_STATS
229 }
230 #endif /* DEBUG */
231 
232 
233 /* -----------------------------------------------------------------------------
234    GarbageCollect: the main entry point to the garbage collector.
235 
236    The collect_gen parameter is gotten by calling calcNeeded().
237 
238    Locks held: all capabilities are held throughout GarbageCollect().
239    -------------------------------------------------------------------------- */
240 
241 void
GarbageCollect(uint32_t collect_gen,const bool do_heap_census,const bool deadlock_detect,uint32_t gc_type USED_IF_THREADS,Capability * cap,bool idle_cap[])242 GarbageCollect (uint32_t collect_gen,
243                 const bool do_heap_census,
244                 const bool deadlock_detect,
245                 uint32_t gc_type USED_IF_THREADS,
246                 Capability *cap,
247                 bool idle_cap[])
248 {
249   bdescr *bd;
250   generation *gen;
251   StgWord live_blocks, live_words, par_max_copied, par_balanced_copied,
252       gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
253       any_work, no_work, scav_find_work;
254 #if defined(THREADED_RTS)
255   gc_thread *saved_gct;
256 #endif
257   uint32_t g, n;
258   // The time we should report our heap census as occurring at, if necessary.
259   Time mut_time = 0;
260 
261   if (do_heap_census) {
262       RTSStats stats;
263       getRTSStats(&stats);
264       mut_time = stats.mutator_cpu_ns;
265   }
266 
267   // necessary if we stole a callee-saves register for gct:
268 #if defined(THREADED_RTS)
269   saved_gct = gct;
270 #endif
271 
272 #if defined(PROFILING)
273   CostCentreStack *save_CCS[n_capabilities];
274 #endif
275 
276   ACQUIRE_SM_LOCK;
277 
278 #if defined(RTS_USER_SIGNALS)
279   if (RtsFlags.MiscFlags.install_signal_handlers) {
280     // block signals
281     blockUserSignals();
282   }
283 #endif
284 
285   ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
286   // otherwise adjust the padding in gen_workspace.
287 
288   // this is the main thread
289   SET_GCT(gc_threads[cap->no]);
290 
291   // tell the stats department that we've started a GC
292   stat_startGC(cap, gct);
293 
294   // Lock the StablePtr table. This prevents FFI calls manipulating
295   // the table from occurring during GC.
296   stablePtrLock();
297 
298 #if defined(DEBUG)
299   zeroMutListScavStats(&mutlist_scav_stats);
300 #endif
301 
302   // attribute any costs to CCS_GC
303 #if defined(PROFILING)
304   for (n = 0; n < n_capabilities; n++) {
305       save_CCS[n] = capabilities[n]->r.rCCCS;
306       capabilities[n]->r.rCCCS = CCS_GC;
307   }
308 #endif
309 
310   /* Figure out which generation to collect
311    */
312   N = collect_gen;
313   major_gc = (N == RtsFlags.GcFlags.generations-1);
314 
315   /* See Note [Deadlock detection under nonmoving collector]. */
316   deadlock_detect_gc = deadlock_detect;
317 
318 #if defined(THREADED_RTS)
319   if (major_gc && RtsFlags.GcFlags.useNonmoving && concurrent_coll_running) {
320       /* If there is already a concurrent major collection running then
321        * there is no benefit to starting another.
322        * TODO: Catch heap-size runaway.
323        */
324       N--;
325       collect_gen--;
326       major_gc = false;
327   }
328 #endif
329 
330   /* N.B. The nonmoving collector works a bit differently. See
331    * Note [Static objects under the nonmoving collector].
332    */
333   if (major_gc && !RtsFlags.GcFlags.useNonmoving) {
334       prev_static_flag = static_flag;
335       static_flag =
336           static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
337   }
338 
339   if (major_gc) {
340       unload_mark_needed = prepareUnloadCheck();
341   } else {
342       unload_mark_needed = false;
343   }
344 
345 #if defined(THREADED_RTS)
346   work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
347                   N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
348       // It's not always a good idea to do load balancing in parallel
349       // GC.  In particular, for a parallel program we don't want to
350       // lose locality by moving cached data into another CPU's cache
351       // (this effect can be quite significant).
352       //
353       // We could have a more complex way to determine whether to do
354       // work stealing or not, e.g. it might be a good idea to do it
355       // if the heap is big.  For now, we just turn it on or off with
356       // a flag.
357 #endif
358 
359   /* Start threads, so they can be spinning up while we finish initialisation.
360    */
361   start_gc_threads();
362 
363 #if defined(THREADED_RTS)
364   /* How many threads will be participating in this GC?
365    * We don't try to parallelise minor GCs (unless the user asks for
366    * it with +RTS -gn0), or mark/compact/sweep GC.
367    */
368   if (gc_type == SYNC_GC_PAR) {
369       n_gc_threads = n_capabilities;
370   } else {
371       n_gc_threads = 1;
372   }
373 #else
374   n_gc_threads = 1;
375 #endif
376 
377   debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
378              N, n_gc_threads);
379 
380 #if defined(DEBUG)
381   // check for memory leaks if DEBUG is on
382   memInventory(DEBUG_gc);
383 #endif
384 
385   // do this *before* we start scavenging
386   collectFreshWeakPtrs();
387 
388   // check sanity *before* GC
389   IF_DEBUG(sanity, checkSanity(false /* before GC */, major_gc));
390 
391   // gather blocks allocated using allocatePinned() from each capability
392   // and put them on the g0->large_object list.
393   collect_pinned_object_blocks();
394 
395   // Initialise all the generations that we're collecting.
396   for (g = 0; g <= N; g++) {
397       prepare_collected_gen(&generations[g]);
398   }
399   // Initialise all the generations that we're *not* collecting.
400   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
401       prepare_uncollected_gen(&generations[g]);
402   }
403 
404   // Prepare this gc_thread
405   init_gc_thread(gct);
406 
407   /* Allocate a mark stack if we're doing a major collection.
408    */
409   if (major_gc && oldest_gen->mark) {
410       mark_stack_bd     = allocBlock();
411       mark_stack_top_bd = mark_stack_bd;
412       mark_stack_bd->link = NULL;
413       mark_stack_bd->u.back = NULL;
414       mark_sp           = mark_stack_bd->start;
415   } else {
416       mark_stack_bd     = NULL;
417       mark_stack_top_bd = NULL;
418       mark_sp           = NULL;
419   }
420 
421   /* -----------------------------------------------------------------------
422    * follow all the roots that we know about:
423    */
424 
425   // the main thread is running: this prevents any other threads from
426   // exiting prematurely, so we can start them now.
427   // NB. do this after the mutable lists have been saved above, otherwise
428   // the other GC threads will be writing into the old mutable lists.
429   inc_running();
430   wakeup_gc_threads(gct->thread_index, idle_cap);
431 
432   traceEventGcWork(gct->cap);
433 
434   // scavenge the capability-private mutable lists.  This isn't part
435   // of markSomeCapabilities() because markSomeCapabilities() can only
436   // call back into the GC via mark_root() (due to the gct register
437   // variable).
438   if (n_gc_threads == 1) {
439       for (n = 0; n < n_capabilities; n++) {
440 #if defined(THREADED_RTS)
441           scavenge_capability_mut_Lists1(capabilities[n]);
442 #else
443           scavenge_capability_mut_lists(capabilities[n]);
444 #endif
445       }
446   } else {
447       scavenge_capability_mut_lists(gct->cap);
448       for (n = 0; n < n_capabilities; n++) {
449           if (idle_cap[n]) {
450               markCapability(mark_root, gct, capabilities[n],
451                              true/*don't mark sparks*/);
452               scavenge_capability_mut_lists(capabilities[n]);
453           }
454       }
455   }
456 
457   // follow roots from the CAF list (used by GHCi)
458   gct->evac_gen_no = 0;
459   markCAFs(mark_root, gct);
460 
461   // follow all the roots that the application knows about.
462   gct->evac_gen_no = 0;
463   if (n_gc_threads == 1) {
464       for (n = 0; n < n_capabilities; n++) {
465           markCapability(mark_root, gct, capabilities[n],
466                          true/*don't mark sparks*/);
467       }
468   } else {
469       markCapability(mark_root, gct, cap, true/*don't mark sparks*/);
470   }
471 
472   markScheduler(mark_root, gct);
473 
474   // Mark the weak pointer list, and prepare to detect dead weak pointers.
475   markWeakPtrList();
476   initWeakForGC();
477 
478   // Mark the stable pointer table.
479   markStablePtrTable(mark_root, gct);
480 
481   // Remember old stable name addresses.
482   rememberOldStableNameAddresses ();
483 
484   /* -------------------------------------------------------------------------
485    * Repeatedly scavenge all the areas we know about until there's no
486    * more scavenging to be done.
487    */
488 
489   StgWeak *dead_weak_ptr_list = NULL;
490   StgTSO *resurrected_threads = END_TSO_QUEUE;
491 
492   for (;;)
493   {
494       scavenge_until_all_done();
495 
496       // The other threads are now stopped.  We might recurse back to
497       // here, but from now on this is the only thread.
498 
499       // must be last...  invariant is that everything is fully
500       // scavenged at this point.
501       if (traverseWeakPtrList(&dead_weak_ptr_list, &resurrected_threads)) { // returns true if evaced something
502           inc_running();
503           continue;
504       }
505 
506       // If we get to here, there's really nothing left to do.
507       break;
508   }
509 
510   shutdown_gc_threads(gct->thread_index, idle_cap);
511 
512   // Now see which stable names are still alive.
513   gcStableNameTable();
514 
515 #if defined(THREADED_RTS)
516   if (n_gc_threads == 1) {
517       for (n = 0; n < n_capabilities; n++) {
518           pruneSparkQueue(false, capabilities[n]);
519       }
520   } else {
521       for (n = 0; n < n_capabilities; n++) {
522           if (n == cap->no || idle_cap[n]) {
523               pruneSparkQueue(false, capabilities[n]);
524          }
525       }
526   }
527 #endif
528 
529 #if defined(PROFILING)
530   // We call processHeapClosureForDead() on every closure destroyed during
531   // the current garbage collection, so we invoke LdvCensusForDead().
532   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
533       || RtsFlags.ProfFlags.bioSelector != NULL) {
534       RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
535       LdvCensusForDead(N);
536       ACQUIRE_SM_LOCK;
537   }
538 #endif
539 
540   // NO MORE EVACUATION AFTER THIS POINT!
541 
542   // Finally: compact or sweep the oldest generation.
543   if (major_gc && oldest_gen->mark) {
544       if (oldest_gen->compact)
545           compact(gct->scavenged_static_objects,
546                   &dead_weak_ptr_list,
547                   &resurrected_threads);
548       else
549           sweep(oldest_gen);
550   }
551 
552   copied = 0;
553   par_max_copied = 0;
554   par_balanced_copied = 0;
555   gc_spin_spin = 0;
556   gc_spin_yield = 0;
557   mut_spin_spin = 0;
558   mut_spin_yield = 0;
559   any_work = 0;
560   no_work = 0;
561   scav_find_work = 0;
562   {
563       uint32_t i;
564       uint64_t par_balanced_copied_acc = 0;
565       const gc_thread* thread;
566 
567       // see Note [n_gc_threads]
568       if (n_gc_threads > 1) {
569           for (i=0; i < n_gc_threads; i++) {
570               copied += RELAXED_LOAD(&gc_threads[i]->copied);
571           }
572           for (i=0; i < n_gc_threads; i++) {
573               thread = gc_threads[i];
574               debugTrace(DEBUG_gc,"thread %d:", i);
575               debugTrace(DEBUG_gc,"   copied           %ld",
576                          RELAXED_LOAD(&thread->copied) * sizeof(W_));
577               debugTrace(DEBUG_gc,"   scanned          %ld",
578                          RELAXED_LOAD(&thread->scanned) * sizeof(W_));
579               debugTrace(DEBUG_gc,"   any_work         %ld",
580                          RELAXED_LOAD(&thread->any_work));
581               debugTrace(DEBUG_gc,"   no_work          %ld",
582                          RELAXED_LOAD(&thread->no_work));
583               debugTrace(DEBUG_gc,"   scav_find_work %ld",
584                          RELAXED_LOAD(&thread->scav_find_work));
585 
586 #if defined(THREADED_RTS) && defined(PROF_SPIN)
587               gc_spin_spin += RELAXED_LOAD(&thread->gc_spin.spin);
588               gc_spin_yield += RELAXED_LOAD(&thread->gc_spin.yield);
589               mut_spin_spin += RELAXED_LOAD(&thread->mut_spin.spin);
590               mut_spin_yield += RELAXED_LOAD(&thread->mut_spin.yield);
591 #endif
592 
593               any_work += RELAXED_LOAD(&thread->any_work);
594               no_work += RELAXED_LOAD(&thread->no_work);
595               scav_find_work += RELAXED_LOAD(&thread->scav_find_work);
596 
597               par_max_copied = stg_max(RELAXED_LOAD(&thread->copied), par_max_copied);
598               par_balanced_copied_acc +=
599                   stg_min(n_gc_threads * RELAXED_LOAD(&thread->copied), copied);
600           }
601           // See Note [Work Balance] for an explanation of this computation
602           par_balanced_copied =
603               (par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) /
604               (n_gc_threads - 1);
605       } else {
606           copied += gct->copied;
607       }
608   }
609 
610   // Run through all the generations and tidy up.
611   // We're going to:
612   //   - count the amount of "live" data (live_words, live_blocks)
613   //   - count the amount of "copied" data in this GC (copied)
614   //   - free from-space
615   //   - make to-space the new from-space (set BF_EVACUATED on all blocks)
616   //
617   live_words = 0;
618   live_blocks = 0;
619 
620   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
621 
622     if (g == N) {
623       generations[g].collections++; // for stats
624       if (n_gc_threads > 1) generations[g].par_collections++;
625     }
626 
627     // Count the mutable list as bytes "copied" for the purposes of
628     // stats.  Every mutable list is copied during every GC.
629     if (g > 0) {
630         W_ mut_list_size = 0;
631         for (n = 0; n < n_capabilities; n++) {
632             mut_list_size += countOccupied(capabilities[n]->mut_lists[g]);
633         }
634         copied +=  mut_list_size;
635 
636         debugTrace(DEBUG_gc,
637                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d others)",
638                    (unsigned long)(mut_list_size * sizeof(W_)),
639                    mutlist_scav_stats.n_MUTVAR,
640                    mutlist_scav_stats.n_MUTARR,
641                    mutlist_scav_stats.n_MVAR,
642                    mutlist_scav_stats.n_TVAR,
643                    mutlist_scav_stats.n_TVAR_WATCH_QUEUE,
644                    mutlist_scav_stats.n_TREC_CHUNK,
645                    mutlist_scav_stats.n_TREC_HEADER,
646                    mutlist_scav_stats.n_OTHERS);
647     }
648 
649     bdescr *next, *prev;
650     gen = &generations[g];
651 
652     // for generations we collected...
653     if (g <= N && !(RtsFlags.GcFlags.useNonmoving && gen == oldest_gen)) {
654 
655         /* free old memory and shift to-space into from-space for all
656          * the collected generations (except the allocation area).  These
657          * freed blocks will probaby be quickly recycled.
658          */
659         if (gen->mark)
660         {
661             // tack the new blocks on the end of the existing blocks
662             if (gen->old_blocks != NULL) {
663 
664                 prev = NULL;
665                 for (bd = gen->old_blocks; bd != NULL; bd = next) {
666 
667                     next = bd->link;
668 
669                     if (!(bd->flags & BF_MARKED))
670                     {
671                         if (prev == NULL) {
672                             gen->old_blocks = next;
673                         } else {
674                             prev->link = next;
675                         }
676                         freeGroup(bd);
677                         gen->n_old_blocks--;
678                     }
679                     else
680                     {
681                         gen->n_words += bd->free - bd->start;
682 
683                         // NB. this step might not be compacted next
684                         // time, so reset the BF_MARKED flags.
685                         // They are set before GC if we're going to
686                         // compact.  (search for BF_MARKED above).
687                         bd->flags &= ~BF_MARKED;
688 
689                         // between GCs, all blocks in the heap except
690                         // for the nursery have the BF_EVACUATED flag set.
691                         bd->flags |= BF_EVACUATED;
692 
693                         prev = bd;
694                     }
695                 }
696 
697                 if (prev != NULL) {
698                     prev->link = gen->blocks;
699                     gen->blocks = gen->old_blocks;
700                 }
701             }
702             // add the new blocks to the block tally
703             gen->n_blocks += gen->n_old_blocks;
704             ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
705             ASSERT(countOccupied(gen->blocks) == gen->n_words);
706         }
707         else // not copacted
708         {
709             freeChain(gen->old_blocks);
710         }
711 
712         gen->old_blocks = NULL;
713         gen->n_old_blocks = 0;
714 
715         /* LARGE OBJECTS.  The current live large objects are chained on
716          * scavenged_large, having been moved during garbage
717          * collection from large_objects.  Any objects left on the
718          * large_objects list are therefore dead, so we free them here.
719          */
720         freeChain(gen->large_objects);
721         gen->large_objects  = gen->scavenged_large_objects;
722         gen->n_large_blocks = gen->n_scavenged_large_blocks;
723         gen->n_large_words  = countOccupied(gen->large_objects);
724         gen->n_new_large_words = 0;
725 
726         /* COMPACT_NFDATA. The currently live compacts are chained
727          * to live_compact_objects, quite like large objects. And
728          * objects left on the compact_objects list are dead.
729          *
730          * We don't run a simple freeChain because want to give the
731          * CNF module some chance to free memory that freeChain would
732          * not see (namely blocks appended to a CNF through a compactResize).
733          *
734          * See Note [Compact Normal Forms] for details.
735          */
736         for (bd = gen->compact_objects; bd; bd = next) {
737             next = bd->link;
738             compactFree(((StgCompactNFDataBlock*)bd->start)->owner);
739         }
740         gen->compact_objects = gen->live_compact_objects;
741         gen->n_compact_blocks = gen->n_live_compact_blocks;
742     }
743     else // for generations > N
744     {
745         /* For older generations, we need to append the
746          * scavenged_large_object list (i.e. large objects that have been
747          * promoted during this GC) to the large_object list for that step.
748          */
749         for (bd = gen->scavenged_large_objects; bd; bd = next) {
750             next = bd->link;
751             dbl_link_onto(bd, &gen->large_objects);
752             gen->n_large_words += bd->free - bd->start;
753         }
754 
755         // And same for compacts
756         for (bd = gen->live_compact_objects; bd; bd = next) {
757             next = bd->link;
758             dbl_link_onto(bd, &gen->compact_objects);
759         }
760 
761         // add the new blocks we promoted during this GC
762         gen->n_large_blocks += gen->n_scavenged_large_blocks;
763         gen->n_compact_blocks += gen->n_live_compact_blocks;
764     }
765 
766     ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
767     ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
768     // We can run the same assertion on compact objects because there
769     // is memory "the GC doesn't see" (directly), but which is still
770     // accounted in gen->n_compact_blocks
771 
772     gen->scavenged_large_objects = NULL;
773     gen->n_scavenged_large_blocks = 0;
774     gen->live_compact_objects = NULL;
775     gen->n_live_compact_blocks = 0;
776 
777     // Count "live" data
778     live_words  += genLiveWords(gen);
779     live_blocks += genLiveBlocks(gen);
780 
781     // add in the partial blocks in the gen_workspaces
782     {
783         uint32_t i;
784         for (i = 0; i < n_capabilities; i++) {
785             live_words  += gcThreadLiveWords(i, gen->no);
786             live_blocks += gcThreadLiveBlocks(i, gen->no);
787         }
788     }
789   } // for all generations
790 
791   // Flush the update remembered sets. See Note [Eager update remembered set
792   // flushing] in NonMovingMark.c
793   if (RtsFlags.GcFlags.useNonmoving) {
794       RELEASE_SM_LOCK;
795       for (n = 0; n < n_capabilities; n++) {
796           nonmovingAddUpdRemSetBlocks(&capabilities[n]->upd_rem_set.queue);
797       }
798       ACQUIRE_SM_LOCK;
799   }
800 
801   // Mark and sweep the oldest generation.
802   // N.B. This can only happen after we've moved
803   // oldest_gen->scavenged_large_objects back to oldest_gen->large_objects.
804   ASSERT(oldest_gen->scavenged_large_objects == NULL);
805   if (RtsFlags.GcFlags.useNonmoving && major_gc) {
806       // All threads in non-moving heap should be found to be alive, becuase
807       // threads in the non-moving generation's list should live in the
808       // non-moving heap, and we consider non-moving objects alive during
809       // preparation.
810       ASSERT(oldest_gen->old_threads == END_TSO_QUEUE);
811       // For weaks, remember that we evacuated all weaks to the non-moving heap
812       // in markWeakPtrList(), and then moved the weak_ptr_list list to
813       // old_weak_ptr_list. We then moved weaks with live keys to the
814       // weak_ptr_list again. Then, in collectDeadWeakPtrs() we moved weaks in
815       // old_weak_ptr_list to dead_weak_ptr_list. So at this point
816       // old_weak_ptr_list should be empty.
817       ASSERT(oldest_gen->old_weak_ptr_list == NULL);
818 
819       // we may need to take the lock to allocate mark queue blocks
820       RELEASE_SM_LOCK;
821       // dead_weak_ptr_list contains weak pointers with dead keys. Those need to
822       // be kept alive because we'll use them in finalizeSchedulers(). Similarly
823       // resurrected_threads are also going to be used in resurrectedThreads()
824       // so we need to mark those too.
825       // Note that in sequential case these lists will be appended with more
826       // weaks and threads found to be dead in mark.
827 #if !defined(THREADED_RTS)
828       // In the non-threaded runtime this is the only time we push to the
829       // upd_rem_set
830       nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
831 #endif
832       nonmovingCollect(&dead_weak_ptr_list, &resurrected_threads);
833       ACQUIRE_SM_LOCK;
834   }
835 
836   // Update the max size of older generations after a major GC:
837   // We can't resize here in the case of the concurrent collector since we
838   // don't yet know how much live data we have. This will be instead done
839   // once we finish marking.
840   if (major_gc && RtsFlags.GcFlags.generations > 1 && ! RtsFlags.GcFlags.useNonmoving)
841       resizeGenerations();
842 
843   // Free the mark stack.
844   if (mark_stack_top_bd != NULL) {
845       debugTrace(DEBUG_gc, "mark stack: %d blocks",
846                  countBlocks(mark_stack_top_bd));
847       freeChain(mark_stack_top_bd);
848   }
849 
850   // Free any bitmaps.
851   for (g = 0; g <= N; g++) {
852       gen = &generations[g];
853       if (gen->bitmap != NULL) {
854           freeGroup(gen->bitmap);
855           gen->bitmap = NULL;
856       }
857   }
858 
859   resize_nursery();
860 
861   resetNurseries();
862 
863 #if defined(DEBUG)
864   // Mark the garbage collected CAFs as dead. Done in `nonmovingGcCafs()` when
865   // non-moving GC is enabled.
866   if (major_gc && !RtsFlags.GcFlags.useNonmoving) {
867       gcCAFs();
868   }
869 #endif
870 
871   // Update the stable name hash table
872   updateStableNameTable(major_gc);
873 
874   // unlock the StablePtr table.  Must be before scheduleFinalizers(),
875   // because a finalizer may call hs_free_fun_ptr() or
876   // hs_free_stable_ptr(), both of which access the StablePtr table.
877   stablePtrUnlock();
878 
879   // Unload dynamically-loaded object code after a major GC.
880   // See Note [Object unloading] in CheckUnload.c for details.
881   //
882   // TODO: Similar to `nonmovingGcCafs` non-moving GC should have its own
883   // collector for these objects, but that's currently not implemented, so we
884   // simply don't unload object code when non-moving GC is enabled.
885   if (major_gc && !RtsFlags.GcFlags.useNonmoving) {
886       checkUnload();
887   }
888 
889 #if defined(PROFILING)
890   // resetStaticObjectForProfiling() must be called before
891   // zeroing below.
892 
893   // ToDo: fix the gct->scavenged_static_objects below
894   resetStaticObjectForProfiling(gct->scavenged_static_objects);
895 #endif
896 
897   // Start any pending finalizers.  Must be after
898   // updateStableTables() and stableUnlock() (see #4221).
899   RELEASE_SM_LOCK;
900   scheduleFinalizers(cap, dead_weak_ptr_list);
901   ACQUIRE_SM_LOCK;
902 
903   // check sanity after GC
904   // before resurrectThreads(), because that might overwrite some
905   // closures, which will cause problems with THREADED where we don't
906   // fill slop. If we are using the nonmoving collector then we can't claim to
907   // be *after* the major GC; it's now running concurrently.
908   IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc && !RtsFlags.GcFlags.useNonmoving));
909 
910   // If a heap census is due, we need to do it before
911   // resurrectThreads(), for the same reason as checkSanity above:
912   // resurrectThreads() will overwrite some closures and leave slop
913   // behind.
914   if (do_heap_census) {
915       debugTrace(DEBUG_sched, "performing heap census");
916       RELEASE_SM_LOCK;
917       heapCensus(mut_time);
918       ACQUIRE_SM_LOCK;
919   }
920 
921   // send exceptions to any threads which were about to die
922   RELEASE_SM_LOCK;
923   resurrectThreads(resurrected_threads);
924   ACQUIRE_SM_LOCK;
925 
926   if (major_gc) {
927       W_ need_prealloc, need_live, need, got;
928       uint32_t i;
929 
930       need_live = 0;
931       for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
932           need_live += genLiveBlocks(&generations[i]);
933       }
934       need_live = stg_max(RtsFlags.GcFlags.minOldGenSize, need_live);
935 
936       need_prealloc = 0;
937       for (i = 0; i < n_nurseries; i++) {
938           need_prealloc += nurseries[i].n_blocks;
939       }
940       need_prealloc += RtsFlags.GcFlags.largeAllocLim;
941       need_prealloc += countAllocdBlocks(exec_block);
942       need_prealloc += arenaBlocks();
943 #if defined(PROFILING)
944       if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
945           need_prealloc = retainerStackBlocks();
946       }
947 #endif
948 
949       /* If the amount of data remains constant, next major GC we'll
950        * require (F+1)*live + prealloc. We leave (F+2)*live + prealloc
951        * in order to reduce repeated deallocation and reallocation. #14702
952        */
953       need = need_prealloc + (RtsFlags.GcFlags.oldGenFactor + 2) * need_live;
954 
955       /* Also, if user set heap size, do not drop below it.
956        */
957       need = stg_max(RtsFlags.GcFlags.heapSizeSuggestion, need);
958 
959       /* But with a large nursery, the above estimate might exceed
960        * maxHeapSize.  A large resident set size might make the OS
961        * kill this process, or swap unnecessarily.  Therefore we
962        * ensure that our estimate does not exceed maxHeapSize.
963        */
964       if (RtsFlags.GcFlags.maxHeapSize != 0) {
965           need = stg_min(RtsFlags.GcFlags.maxHeapSize, need);
966       }
967 
968       need = BLOCKS_TO_MBLOCKS(need);
969 
970       got = mblocks_allocated;
971 
972       if (got > need) {
973           returnMemoryToOS(got - need);
974       }
975   }
976 
977   // extra GC trace info
978   IF_DEBUG(gc, statDescribeGens());
979 
980 #if defined(DEBUG)
981   // symbol-table based profiling
982   /*  heapCensus(to_blocks); */ /* ToDo */
983 #endif
984 
985   // restore enclosing cost centre
986 #if defined(PROFILING)
987   for (n = 0; n < n_capabilities; n++) {
988       capabilities[n]->r.rCCCS = save_CCS[n];
989   }
990 #endif
991 
992 #if defined(DEBUG)
993   // check for memory leaks if DEBUG is on
994   memInventory(DEBUG_gc);
995 #endif
996 
997   // ok, GC over: tell the stats department what happened.
998   stat_endGCWorker(cap, gct);
999   stat_endGC(cap, gct, live_words, copied,
1000              live_blocks * BLOCK_SIZE_W - live_words /* slop */,
1001              N, n_gc_threads, gc_threads,
1002              par_max_copied, par_balanced_copied,
1003              gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
1004              any_work, no_work, scav_find_work);
1005 
1006 #if defined(RTS_USER_SIGNALS)
1007   if (RtsFlags.MiscFlags.install_signal_handlers) {
1008     // unblock signals again
1009     unblockUserSignals();
1010   }
1011 #endif
1012 
1013   RELEASE_SM_LOCK;
1014 
1015   SET_GCT(saved_gct);
1016 }
1017 
1018 /* -----------------------------------------------------------------------------
1019    Heap overflow is indicated by setting a flag that the caller of
1020    GarbageCollect can check.  (not ideal, TODO: better)
1021    -------------------------------------------------------------------------- */
1022 
heapOverflow(void)1023 static void heapOverflow(void)
1024 {
1025     heap_overflow = true;
1026 }
1027 
1028 /* -----------------------------------------------------------------------------
1029    Initialise the gc_thread structures.
1030    -------------------------------------------------------------------------- */
1031 
1032 static void
new_gc_thread(uint32_t n,gc_thread * t)1033 new_gc_thread (uint32_t n, gc_thread *t)
1034 {
1035     uint32_t g;
1036     gen_workspace *ws;
1037 
1038     t->cap = capabilities[n];
1039 
1040 #if defined(THREADED_RTS)
1041     t->id = 0;
1042     initSpinLock(&t->gc_spin);
1043     initSpinLock(&t->mut_spin);
1044     ACQUIRE_SPIN_LOCK(&t->gc_spin);
1045     ACQUIRE_SPIN_LOCK(&t->mut_spin);
1046     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
1047                           // thread to start up, see wakeup_gc_threads
1048 #endif
1049 
1050     t->thread_index = n;
1051     t->free_blocks = NULL;
1052     t->gc_count = 0;
1053 
1054     init_gc_thread(t);
1055 
1056     for (g = 0; g < RtsFlags.GcFlags.generations; g++)
1057     {
1058         ws = &t->gens[g];
1059         ws->gen = &generations[g];
1060         ASSERT(g == ws->gen->no);
1061         ws->my_gct = t;
1062 
1063         // We want to call
1064         //   alloc_todo_block(ws,0);
1065         // but can't, because it uses gct which isn't set up at this point.
1066         // Hence, allocate a block for todo_bd manually:
1067         {
1068             bdescr *bd = allocBlockOnNode(capNoToNumaNode(n));
1069                 // no lock, locks aren't initialised yet
1070             initBdescr(bd, ws->gen, ws->gen->to);
1071             bd->flags = BF_EVACUATED;
1072             bd->u.scan = bd->free = bd->start;
1073 
1074             ws->todo_bd = bd;
1075             ws->todo_free = bd->free;
1076             ws->todo_lim = bd->start + BLOCK_SIZE_W;
1077         }
1078 
1079         ws->todo_q = newWSDeque(128);
1080         ws->todo_overflow = NULL;
1081         ws->n_todo_overflow = 0;
1082         ws->todo_large_objects = NULL;
1083         ws->todo_seg = END_NONMOVING_TODO_LIST;
1084 
1085         ws->part_list = NULL;
1086         ws->n_part_blocks = 0;
1087         ws->n_part_words = 0;
1088 
1089         ws->scavd_list = NULL;
1090         ws->n_scavd_blocks = 0;
1091         ws->n_scavd_words = 0;
1092     }
1093 }
1094 
1095 
1096 void
initGcThreads(uint32_t from USED_IF_THREADS,uint32_t to USED_IF_THREADS)1097 initGcThreads (uint32_t from USED_IF_THREADS, uint32_t to USED_IF_THREADS)
1098 {
1099 #if defined(THREADED_RTS)
1100     uint32_t i;
1101 
1102     if (from > 0) {
1103         gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
1104                                       "initGcThreads");
1105     } else {
1106         gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
1107                                      "initGcThreads");
1108     }
1109 
1110     for (i = from; i < to; i++) {
1111         gc_threads[i] =
1112             stgMallocBytes(sizeof(gc_thread) +
1113                            RtsFlags.GcFlags.generations * sizeof(gen_workspace),
1114                            "alloc_gc_threads");
1115 
1116         new_gc_thread(i, gc_threads[i]);
1117     }
1118 #else
1119     ASSERT(from == 0 && to == 1);
1120     gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
1121     gc_threads[0] = gct;
1122     new_gc_thread(0,gc_threads[0]);
1123 #endif
1124 }
1125 
1126 void
freeGcThreads(void)1127 freeGcThreads (void)
1128 {
1129     uint32_t g;
1130     if (gc_threads != NULL) {
1131 #if defined(THREADED_RTS)
1132         uint32_t i;
1133         for (i = 0; i < n_capabilities; i++) {
1134             for (g = 0; g < RtsFlags.GcFlags.generations; g++)
1135             {
1136                 freeWSDeque(gc_threads[i]->gens[g].todo_q);
1137             }
1138             stgFree (gc_threads[i]);
1139         }
1140         stgFree (gc_threads);
1141 #else
1142         for (g = 0; g < RtsFlags.GcFlags.generations; g++)
1143         {
1144             freeWSDeque(gc_threads[0]->gens[g].todo_q);
1145         }
1146         stgFree (gc_threads);
1147 #endif
1148         gc_threads = NULL;
1149     }
1150 }
1151 
1152 /* ----------------------------------------------------------------------------
1153    Start GC threads
1154    ------------------------------------------------------------------------- */
1155 
1156 static volatile StgWord gc_running_threads;
1157 
1158 static StgWord
inc_running(void)1159 inc_running (void)
1160 {
1161     StgWord new;
1162     new = atomic_inc(&gc_running_threads, 1);
1163     ASSERT(new <= n_gc_threads);
1164     return new;
1165 }
1166 
1167 static StgWord
dec_running(void)1168 dec_running (void)
1169 {
1170     ASSERT(RELAXED_LOAD(&gc_running_threads) != 0);
1171     return atomic_dec(&gc_running_threads);
1172 }
1173 
1174 static bool
any_work(void)1175 any_work (void)
1176 {
1177     int g;
1178     gen_workspace *ws;
1179 
1180     NONATOMIC_ADD(&gct->any_work, 1);
1181 
1182     write_barrier();
1183 
1184     // scavenge objects in compacted generation
1185     if (mark_stack_bd != NULL && !mark_stack_empty()) {
1186         return true;
1187     }
1188 
1189     // Check for global work in any gen.  We don't need to check for
1190     // local work, because we have already exited scavenge_loop(),
1191     // which means there is no local work for this thread.
1192     for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
1193         ws = &gct->gens[g];
1194         if (ws->todo_large_objects) return true;
1195         if (!looksEmptyWSDeque(ws->todo_q)) return true;
1196         if (ws->todo_overflow) return true;
1197     }
1198 
1199 #if defined(THREADED_RTS)
1200     if (work_stealing) {
1201         uint32_t n;
1202         // look for work to steal
1203         for (n = 0; n < n_gc_threads; n++) {
1204             if (n == gct->thread_index) continue;
1205             for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
1206                 ws = &gc_threads[n]->gens[g];
1207                 if (!looksEmptyWSDeque(ws->todo_q)) return true;
1208             }
1209         }
1210     }
1211 #endif
1212 
1213     __atomic_fetch_add(&gct->no_work, 1, __ATOMIC_RELAXED);
1214 #if defined(THREADED_RTS)
1215     yieldThread();
1216 #endif
1217 
1218     return false;
1219 }
1220 
1221 static void
scavenge_until_all_done(void)1222 scavenge_until_all_done (void)
1223 {
1224     DEBUG_ONLY( uint32_t r );
1225 
1226 
1227 loop:
1228 #if defined(THREADED_RTS)
1229     if (n_gc_threads > 1) {
1230         scavenge_loop();
1231     } else {
1232         scavenge_loop1();
1233     }
1234 #else
1235     scavenge_loop();
1236 #endif
1237 
1238     collect_gct_blocks();
1239 
1240     // scavenge_loop() only exits when there's no work to do
1241 
1242     // This atomic decrement also serves as a full barrier to ensure that any
1243     // writes we made during scavenging are visible to other threads.
1244 #if defined(DEBUG)
1245     r = dec_running();
1246 #else
1247     dec_running();
1248 #endif
1249 
1250     traceEventGcIdle(gct->cap);
1251 
1252     debugTrace(DEBUG_gc, "%d GC threads still running", r);
1253 
1254     while (SEQ_CST_LOAD(&gc_running_threads) != 0) {
1255         // usleep(1);
1256         if (any_work()) {
1257             inc_running();
1258             traceEventGcWork(gct->cap);
1259             goto loop;
1260         }
1261         // any_work() does not remove the work from the queue, it
1262         // just checks for the presence of work.  If we find any,
1263         // then we increment gc_running_threads and go back to
1264         // scavenge_loop() to perform any pending work.
1265     }
1266 
1267     traceEventGcDone(gct->cap);
1268 }
1269 
1270 #if defined(THREADED_RTS)
1271 
1272 void
gcWorkerThread(Capability * cap)1273 gcWorkerThread (Capability *cap)
1274 {
1275     gc_thread *saved_gct;
1276 
1277     // necessary if we stole a callee-saves register for gct:
1278     saved_gct = gct;
1279 
1280     SET_GCT(gc_threads[cap->no]);
1281     gct->id = osThreadId();
1282     stat_startGCWorker (cap, gct);
1283 
1284     // Wait until we're told to wake up
1285     RELEASE_SPIN_LOCK(&gct->mut_spin);
1286     // yieldThread();
1287     //    Strangely, adding a yieldThread() here makes the CPU time
1288     //    measurements more accurate on Linux, perhaps because it syncs
1289     //    the CPU time across the multiple cores.  Without this, CPU time
1290     //    is heavily skewed towards GC rather than MUT.
1291     SEQ_CST_STORE(&gct->wakeup, GC_THREAD_STANDING_BY);
1292     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1293     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1294 
1295     init_gc_thread(gct);
1296 
1297     traceEventGcWork(gct->cap);
1298 
1299     // Every thread evacuates some roots.
1300     gct->evac_gen_no = 0;
1301     markCapability(mark_root, gct, cap, true/*prune sparks*/);
1302     scavenge_capability_mut_lists(cap);
1303 
1304     scavenge_until_all_done();
1305 
1306 #if defined(THREADED_RTS)
1307     // Now that the whole heap is marked, we discard any sparks that
1308     // were found to be unreachable.  The main GC thread is currently
1309     // marking heap reachable via weak pointers, so it is
1310     // non-deterministic whether a spark will be retained if it is
1311     // only reachable via weak pointers.  To fix this problem would
1312     // require another GC barrier, which is too high a price.
1313     pruneSparkQueue(false, cap);
1314 #endif
1315 
1316     // Wait until we're told to continue
1317     RELEASE_SPIN_LOCK(&gct->gc_spin);
1318     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1319                gct->thread_index);
1320     stat_endGCWorker (cap, gct);
1321     // This must come *after* stat_endGCWorker since it serves to
1322     // synchronize us with the GC leader, which will later aggregate the
1323     // GC statistics.
1324     SEQ_CST_STORE(&gct->wakeup, GC_THREAD_WAITING_TO_CONTINUE);
1325     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1326     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1327 
1328     SET_GCT(saved_gct);
1329 }
1330 
1331 #endif
1332 
1333 #if defined(THREADED_RTS)
1334 
1335 void
waitForGcThreads(Capability * cap USED_IF_THREADS,bool idle_cap[])1336 waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
1337 {
1338     const uint32_t n_threads = n_capabilities;
1339     const uint32_t me = cap->no;
1340     uint32_t i, j;
1341     bool retry = true;
1342     Time t0, t1, t2;
1343 
1344     t0 = t1 = t2 = getProcessElapsedTime();
1345 
1346     while(retry) {
1347         for (i=0; i < n_threads; i++) {
1348             if (i == me || idle_cap[i]) continue;
1349             if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) {
1350                 prodCapability(capabilities[i], cap->running_task);
1351             }
1352         }
1353         for (j=0; j < 10; j++) {
1354             retry = false;
1355             for (i=0; i < n_threads; i++) {
1356                 if (i == me || idle_cap[i]) continue;
1357                 write_barrier();
1358                 interruptCapability(capabilities[i]);
1359                 if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) {
1360                     retry = true;
1361                 }
1362             }
1363             if (!retry) break;
1364 #if defined(PROF_SPIN)
1365             waitForGcThreads_yield++;
1366 #endif
1367             yieldThread();
1368         }
1369 
1370         t2 = getProcessElapsedTime();
1371         if (RtsFlags.GcFlags.longGCSync != 0 &&
1372             t2 - t1 > RtsFlags.GcFlags.longGCSync) {
1373             /* call this every longGCSync of delay */
1374             rtsConfig.longGCSync(cap->no, t2 - t0);
1375             t1 = t2;
1376         }
1377         if (retry) {
1378 #if defined(PROF_SPIN)
1379             // This is a bit strange, we'll get more yields than spins.
1380             // I guess that means it's not a spin-lock at all, but these
1381             // numbers are still useful (I think).
1382             waitForGcThreads_spin++;
1383 #endif
1384         }
1385     }
1386 
1387     if (RtsFlags.GcFlags.longGCSync != 0 &&
1388         t2 - t0 > RtsFlags.GcFlags.longGCSync) {
1389         rtsConfig.longGCSyncEnd(t2 - t0);
1390     }
1391 }
1392 
1393 #endif // THREADED_RTS
1394 
1395 static void
start_gc_threads(void)1396 start_gc_threads (void)
1397 {
1398 #if defined(THREADED_RTS)
1399     gc_running_threads = 0;
1400 #endif
1401 }
1402 
1403 static void
wakeup_gc_threads(uint32_t me USED_IF_THREADS,bool idle_cap[]USED_IF_THREADS)1404 wakeup_gc_threads (uint32_t me USED_IF_THREADS,
1405                    bool idle_cap[] USED_IF_THREADS)
1406 {
1407 #if defined(THREADED_RTS)
1408     uint32_t i;
1409 
1410     if (n_gc_threads == 1) return;
1411 
1412     for (i=0; i < n_gc_threads; i++) {
1413         if (i == me || idle_cap[i]) continue;
1414         inc_running();
1415         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1416         if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY)
1417             barf("wakeup_gc_threads");
1418 
1419         SEQ_CST_STORE(&gc_threads[i]->wakeup, GC_THREAD_RUNNING);
1420         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1421         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1422     }
1423 #endif
1424 }
1425 
1426 // After GC is complete, we must wait for all GC threads to enter the
1427 // standby state, otherwise they may still be executing inside
1428 // any_work(), and may even remain awake until the next GC starts.
1429 static void
shutdown_gc_threads(uint32_t me USED_IF_THREADS,bool idle_cap[]USED_IF_THREADS)1430 shutdown_gc_threads (uint32_t me USED_IF_THREADS,
1431                      bool idle_cap[] USED_IF_THREADS)
1432 {
1433 #if defined(THREADED_RTS)
1434     uint32_t i;
1435 
1436     if (n_gc_threads == 1) return;
1437 
1438     for (i=0; i < n_gc_threads; i++) {
1439         if (i == me || idle_cap[i]) continue;
1440         while (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_WAITING_TO_CONTINUE) {
1441             busy_wait_nop();
1442         }
1443     }
1444 #endif
1445 }
1446 
1447 #if defined(THREADED_RTS)
1448 void
releaseGCThreads(Capability * cap USED_IF_THREADS,bool idle_cap[])1449 releaseGCThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
1450 {
1451     const uint32_t n_threads = n_capabilities;
1452     const uint32_t me = cap->no;
1453     uint32_t i;
1454     for (i=0; i < n_threads; i++) {
1455         if (i == me || idle_cap[i]) continue;
1456         if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_WAITING_TO_CONTINUE)
1457             barf("releaseGCThreads");
1458 
1459         SEQ_CST_STORE(&gc_threads[i]->wakeup, GC_THREAD_INACTIVE);
1460         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1461         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1462     }
1463 }
1464 #endif
1465 
1466 /* ----------------------------------------------------------------------------
1467    Save the mutable lists in saved_mut_lists where it will be scavenged
1468    during GC
1469    ------------------------------------------------------------------------- */
1470 
1471 static void
stash_mut_list(Capability * cap,uint32_t gen_no)1472 stash_mut_list (Capability *cap, uint32_t gen_no)
1473 {
1474     cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1475     RELEASE_STORE(&cap->mut_lists[gen_no], allocBlockOnNode_sync(cap->node));
1476 }
1477 
1478 /* ----------------------------------------------------------------------------
1479    Initialise a generation that is to be collected
1480    ------------------------------------------------------------------------- */
1481 
1482 static void
prepare_collected_gen(generation * gen)1483 prepare_collected_gen (generation *gen)
1484 {
1485     uint32_t i, g, n;
1486     gen_workspace *ws;
1487     bdescr *bd, *next;
1488 
1489     g = gen->no;
1490 
1491     if (RtsFlags.GcFlags.useNonmoving && g == oldest_gen->no) {
1492         // Nonmoving heap's mutable list is always a root.
1493         for (i = 0; i < n_capabilities; i++) {
1494             stash_mut_list(capabilities[i], g);
1495         }
1496     } else if (g != 0) {
1497         // Otherwise throw away the current mutable list. Invariant: the
1498         // mutable list always has at least one block; this means we can avoid
1499         // a check for NULL in recordMutable().
1500         for (i = 0; i < n_capabilities; i++) {
1501             bdescr *old = RELAXED_LOAD(&capabilities[i]->mut_lists[g]);
1502             freeChain(old);
1503 
1504             bdescr *new = allocBlockOnNode(capNoToNumaNode(i));
1505             RELAXED_STORE(&capabilities[i]->mut_lists[g], new);
1506         }
1507     }
1508 
1509     gen = &generations[g];
1510     ASSERT(gen->no == g);
1511 
1512     // we'll construct a new list of threads in this step
1513     // during GC, throw away the current list.
1514     gen->old_threads = gen->threads;
1515     gen->threads = END_TSO_QUEUE;
1516 
1517     // deprecate the existing blocks (except in the case of the nonmoving
1518     // collector since these will be preserved in nonmovingCollect for the
1519     // concurrent GC).
1520     if (!(RtsFlags.GcFlags.useNonmoving && g == oldest_gen->no)) {
1521         gen->old_blocks   = gen->blocks;
1522         gen->n_old_blocks = gen->n_blocks;
1523         gen->blocks       = NULL;
1524         gen->n_blocks     = 0;
1525         gen->n_words      = 0;
1526         gen->live_estimate = 0;
1527     }
1528 
1529     // initialise the large object queues.
1530     ASSERT(gen->scavenged_large_objects == NULL);
1531     ASSERT(gen->n_scavenged_large_blocks == 0);
1532     ASSERT(gen->live_compact_objects == NULL);
1533     ASSERT(gen->n_live_compact_blocks == 0);
1534 
1535     // grab all the partial blocks stashed in the gc_thread workspaces and
1536     // move them to the old_blocks list of this gen.
1537     for (n = 0; n < n_capabilities; n++) {
1538         ws = &gc_threads[n]->gens[gen->no];
1539 
1540         for (bd = ws->part_list; bd != NULL; bd = next) {
1541             next = bd->link;
1542             bd->link = gen->old_blocks;
1543             gen->old_blocks = bd;
1544             gen->n_old_blocks += bd->blocks;
1545         }
1546         ws->part_list = NULL;
1547         ws->n_part_blocks = 0;
1548         ws->n_part_words = 0;
1549 
1550         ASSERT(ws->scavd_list == NULL);
1551         ASSERT(ws->n_scavd_blocks == 0);
1552         ASSERT(ws->n_scavd_words == 0);
1553 
1554         if (ws->todo_free != ws->todo_bd->start) {
1555             ws->todo_bd->free = ws->todo_free;
1556             ws->todo_bd->link = gen->old_blocks;
1557             gen->old_blocks = ws->todo_bd;
1558             gen->n_old_blocks += ws->todo_bd->blocks;
1559             alloc_todo_block(ws,0); // always has one block.
1560         }
1561     }
1562 
1563     // mark the small objects as from-space
1564     for (bd = gen->old_blocks; bd; bd = bd->link) {
1565         bd->flags &= ~BF_EVACUATED;
1566     }
1567 
1568     // mark the large objects as from-space
1569     for (bd = gen->large_objects; bd; bd = bd->link) {
1570         bd->flags &= ~BF_EVACUATED;
1571     }
1572 
1573     // mark the compact objects as from-space
1574     for (bd = gen->compact_objects; bd; bd = bd->link) {
1575         bd->flags &= ~BF_EVACUATED;
1576     }
1577 
1578     // for a compacted generation, we need to allocate the bitmap
1579     if (gen->mark) {
1580         StgWord bitmap_size; // in bytes
1581         bdescr *bitmap_bdescr;
1582         StgWord *bitmap;
1583 
1584         bitmap_size = gen->n_old_blocks * BLOCK_SIZE / BITS_IN(W_);
1585 
1586         if (bitmap_size > 0) {
1587             bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1588                                        / BLOCK_SIZE);
1589             gen->bitmap = bitmap_bdescr;
1590             bitmap = bitmap_bdescr->start;
1591 
1592             debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1593                        bitmap_size, bitmap);
1594 
1595             // don't forget to fill it with zeros!
1596             memset(bitmap, 0, bitmap_size);
1597 
1598             // For each block in this step, point to its bitmap from the
1599             // block descriptor.
1600             for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1601                 bd->u.bitmap = bitmap;
1602                 bitmap += BLOCK_SIZE_W / BITS_IN(W_);
1603 
1604                 // Also at this point we set the BF_MARKED flag
1605                 // for this block.  The invariant is that
1606                 // BF_MARKED is always unset, except during GC
1607                 // when it is set on those blocks which will be
1608                 // compacted.
1609                 if (!(bd->flags & BF_FRAGMENTED)) {
1610                     bd->flags |= BF_MARKED;
1611                 }
1612 
1613                 // BF_SWEPT should be marked only for blocks that are being
1614                 // collected in sweep()
1615                 bd->flags &= ~BF_SWEPT;
1616             }
1617         }
1618     }
1619 }
1620 
1621 /* ----------------------------------------------------------------------------
1622    Initialise a generation that is *not* to be collected
1623    ------------------------------------------------------------------------- */
1624 
1625 static void
prepare_uncollected_gen(generation * gen)1626 prepare_uncollected_gen (generation *gen)
1627 {
1628     uint32_t i;
1629 
1630 
1631     ASSERT(gen->no > 0);
1632 
1633     // save the current mutable lists for this generation, and
1634     // allocate a fresh block for each one.  We'll traverse these
1635     // mutable lists as roots early on in the GC.
1636     for (i = 0; i < n_capabilities; i++) {
1637         stash_mut_list(capabilities[i], gen->no);
1638     }
1639 
1640     ASSERT(gen->scavenged_large_objects == NULL);
1641     ASSERT(gen->n_scavenged_large_blocks == 0);
1642 }
1643 
1644 /* -----------------------------------------------------------------------------
1645    Collect the completed blocks from a GC thread and attach them to
1646    the generation.
1647    -------------------------------------------------------------------------- */
1648 
1649 static void
collect_gct_blocks(void)1650 collect_gct_blocks (void)
1651 {
1652     uint32_t g;
1653     gen_workspace *ws;
1654     bdescr *bd, *prev;
1655 
1656     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1657         ws = &gct->gens[g];
1658 
1659         // there may still be a block attached to ws->todo_bd;
1660         // leave it there to use next time.
1661 
1662         if (ws->scavd_list != NULL) {
1663             ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1664 
1665             ASSERT(gct->scan_bd == NULL);
1666             ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1667 
1668             prev = NULL;
1669             for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1670                 prev = bd;
1671             }
1672             if (prev != NULL) {
1673                 prev->link = ws->gen->blocks;
1674                 ws->gen->blocks = ws->scavd_list;
1675             }
1676             ws->gen->n_blocks += ws->n_scavd_blocks;
1677             ws->gen->n_words += ws->n_scavd_words;
1678 
1679             ws->scavd_list = NULL;
1680             ws->n_scavd_blocks = 0;
1681             ws->n_scavd_words = 0;
1682 
1683             RELEASE_SPIN_LOCK(&ws->gen->sync);
1684         }
1685     }
1686 }
1687 
1688 /* -----------------------------------------------------------------------------
1689    During mutation, any blocks that are filled by allocatePinned() are stashed
1690    on the local pinned_object_blocks list, to avoid needing to take a global
1691    lock.  Here we collect those blocks from the cap->pinned_object_blocks lists
1692    and put them on the g0->large_object or oldest_gen->large_objects.
1693 
1694    How to decide which list to put them on?
1695 
1696    - When non-moving heap is enabled and this is a major GC, we put them on
1697      oldest_gen. This is because after preparation we really want no
1698      old-to-young references, and we want to be able to reset mut_lists. For
1699      this we need to promote every potentially live object to the oldest gen.
1700 
1701    - Otherwise we put them on g0.
1702    -------------------------------------------------------------------------- */
1703 
1704 static void
collect_pinned_object_blocks(void)1705 collect_pinned_object_blocks (void)
1706 {
1707     const bool use_nonmoving = RtsFlags.GcFlags.useNonmoving;
1708     generation *const gen = (use_nonmoving && major_gc) ? oldest_gen : g0;
1709 
1710     for (uint32_t n = 0; n < n_capabilities; n++) {
1711         bdescr *last = NULL;
1712         if (use_nonmoving && gen == oldest_gen) {
1713             // Mark objects as belonging to the nonmoving heap
1714             for (bdescr *bd = RELAXED_LOAD(&capabilities[n]->pinned_object_blocks); bd != NULL; bd = bd->link) {
1715                 bd->flags |= BF_NONMOVING;
1716                 bd->gen = oldest_gen;
1717                 bd->gen_no = oldest_gen->no;
1718                 oldest_gen->n_large_words += bd->free - bd->start;
1719                 oldest_gen->n_large_blocks += bd->blocks;
1720                 last = bd;
1721             }
1722         } else {
1723             for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
1724                 last = bd;
1725             }
1726         }
1727 
1728         if (last != NULL) {
1729             last->link = gen->large_objects;
1730             if (gen->large_objects != NULL) {
1731                 gen->large_objects->u.back = last;
1732             }
1733             gen->large_objects = RELAXED_LOAD(&capabilities[n]->pinned_object_blocks);
1734             RELAXED_STORE(&capabilities[n]->pinned_object_blocks, NULL);
1735         }
1736     }
1737 }
1738 
1739 /* -----------------------------------------------------------------------------
1740    Initialise a gc_thread before GC
1741    -------------------------------------------------------------------------- */
1742 
1743 static void
init_gc_thread(gc_thread * t)1744 init_gc_thread (gc_thread *t)
1745 {
1746     t->static_objects = END_OF_STATIC_OBJECT_LIST;
1747     t->scavenged_static_objects = END_OF_STATIC_OBJECT_LIST;
1748     t->scan_bd = NULL;
1749     t->mut_lists = t->cap->mut_lists;
1750     t->evac_gen_no = 0;
1751     t->failed_to_evac = false;
1752     t->eager_promotion = true;
1753     t->thunk_selector_depth = 0;
1754     t->copied = 0;
1755     t->scanned = 0;
1756     t->any_work = 0;
1757     t->no_work = 0;
1758     t->scav_find_work = 0;
1759 }
1760 
1761 /* -----------------------------------------------------------------------------
1762    Function we pass to evacuate roots.
1763    -------------------------------------------------------------------------- */
1764 
1765 static void
mark_root(void * user USED_IF_THREADS,StgClosure ** root)1766 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1767 {
1768     // we stole a register for gct, but this function is called from
1769     // *outside* the GC where the register variable is not in effect,
1770     // so we need to save and restore it here.  NB. only call
1771     // mark_root() from the main GC thread, otherwise gct will be
1772     // incorrect.
1773 #if defined(THREADED_RTS)
1774     gc_thread *saved_gct;
1775     saved_gct = gct;
1776 #endif
1777     SET_GCT(user);
1778 
1779     evacuate(root);
1780 
1781     SET_GCT(saved_gct);
1782 }
1783 
1784 /* ----------------------------------------------------------------------------
1785    Reset the sizes of the older generations when we do a major
1786    collection.
1787 
1788    CURRENT STRATEGY: make all generations except zero the same size.
1789    We have to stay within the maximum heap size, and leave a certain
1790    percentage of the maximum heap size available to allocate into.
1791    ------------------------------------------------------------------------- */
1792 
1793 void
resizeGenerations(void)1794 resizeGenerations (void)
1795 {
1796     uint32_t g;
1797     W_ live, size, min_alloc, words;
1798     const W_ max  = RtsFlags.GcFlags.maxHeapSize;
1799     const W_ gens = RtsFlags.GcFlags.generations;
1800 
1801     // live in the oldest generations
1802     if (oldest_gen->live_estimate != 0) {
1803         words = oldest_gen->live_estimate;
1804     } else {
1805         words = oldest_gen->n_words;
1806     }
1807     live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1808         oldest_gen->n_large_blocks +
1809         oldest_gen->n_compact_blocks;
1810 
1811     // default max size for all generations except zero
1812     size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1813                    RtsFlags.GcFlags.minOldGenSize);
1814 
1815     if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1816         if (max > 0) {
1817             RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1818         } else {
1819             RtsFlags.GcFlags.heapSizeSuggestion = size;
1820         }
1821     }
1822 
1823     // minimum size for generation zero
1824     min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1825                         RtsFlags.GcFlags.minAllocAreaSize
1826                         * (W_)n_capabilities);
1827 
1828     // Auto-enable compaction when the residency reaches a
1829     // certain percentage of the maximum heap size (default: 30%).
1830     // Except when non-moving GC is enabled.
1831     if (!RtsFlags.GcFlags.useNonmoving &&
1832         (RtsFlags.GcFlags.compact ||
1833          (max > 0 &&
1834           oldest_gen->n_blocks >
1835           (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1836         oldest_gen->mark = 1;
1837         oldest_gen->compact = 1;
1838 //        debugBelch("compaction: on\n", live);
1839     } else {
1840         oldest_gen->mark = 0;
1841         oldest_gen->compact = 0;
1842 //        debugBelch("compaction: off\n", live);
1843     }
1844 
1845     if (RtsFlags.GcFlags.sweep) {
1846         oldest_gen->mark = 1;
1847     }
1848 
1849     // if we're going to go over the maximum heap size, reduce the
1850     // size of the generations accordingly.  The calculation is
1851     // different if compaction is turned on, because we don't need
1852     // to double the space required to collect the old generation.
1853     if (max != 0) {
1854 
1855         // this test is necessary to ensure that the calculations
1856         // below don't have any negative results - we're working
1857         // with unsigned values here.
1858         if (max < min_alloc) {
1859             heapOverflow();
1860         }
1861 
1862         if (oldest_gen->compact) {
1863             if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1864                 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1865             }
1866         } else {
1867             if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1868                 size = (max - min_alloc) / ((gens - 1) * 2);
1869             }
1870         }
1871 
1872         if (size < live) {
1873             heapOverflow();
1874         }
1875     }
1876 
1877 #if 0
1878     debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1879                min_alloc, size, max);
1880     debugBelch("resize_gen: n_blocks: %lu, n_large_block: %lu, n_compact_blocks: %lu\n",
1881                oldest_gen->n_blocks, oldest_gen->n_large_blocks, oldest_gen->n_compact_blocks);
1882     debugBelch("resize_gen: max_blocks: %lu -> %lu\n", oldest_gen->max_blocks, oldest_gen->n_blocks);
1883 #endif
1884 
1885     for (g = 0; g < gens; g++) {
1886         generations[g].max_blocks = size;
1887     }
1888 }
1889 
1890 /* -----------------------------------------------------------------------------
1891    Calculate the new size of the nursery, and resize it.
1892    -------------------------------------------------------------------------- */
1893 
1894 static void
resize_nursery(void)1895 resize_nursery (void)
1896 {
1897     const StgWord min_nursery =
1898       RtsFlags.GcFlags.minAllocAreaSize * (StgWord)n_capabilities;
1899 
1900     if (RtsFlags.GcFlags.generations == 1)
1901     {   // Two-space collector:
1902         W_ blocks;
1903 
1904         /* set up a new nursery.  Allocate a nursery size based on a
1905          * function of the amount of live data (by default a factor of 2)
1906          * Use the blocks from the old nursery if possible, freeing up any
1907          * left over blocks.
1908          *
1909          * If we get near the maximum heap size, then adjust our nursery
1910          * size accordingly.  If the nursery is the same size as the live
1911          * data (L), then we need 3L bytes.  We can reduce the size of the
1912          * nursery to bring the required memory down near 2L bytes.
1913          *
1914          * A normal 2-space collector would need 4L bytes to give the same
1915          * performance we get from 3L bytes, reducing to the same
1916          * performance at 2L bytes.
1917          */
1918         blocks = generations[0].n_blocks;
1919 
1920         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1921              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1922              RtsFlags.GcFlags.maxHeapSize )
1923         {
1924             long adjusted_blocks;  // signed on purpose
1925             int pc_free;
1926 
1927             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1928 
1929             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1930                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1931 
1932             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1933             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1934             {
1935                 heapOverflow();
1936             }
1937             blocks = adjusted_blocks;
1938         }
1939         else
1940         {
1941             blocks *= RtsFlags.GcFlags.oldGenFactor;
1942             if (blocks < min_nursery)
1943             {
1944                 blocks = min_nursery;
1945             }
1946         }
1947         resizeNurseries(blocks);
1948     }
1949     else  // Generational collector
1950     {
1951         /*
1952          * If the user has given us a suggested heap size, adjust our
1953          * allocation area to make best use of the memory available.
1954          */
1955         if (RtsFlags.GcFlags.heapSizeSuggestion)
1956         {
1957             long blocks;
1958             StgWord needed;
1959 
1960             calcNeeded(false, &needed); // approx blocks needed at next GC
1961 
1962             /* Guess how much will be live in generation 0 step 0 next time.
1963              * A good approximation is obtained by finding the
1964              * percentage of g0 that was live at the last minor GC.
1965              *
1966              * We have an accurate figure for the amount of copied data in
1967              * 'copied', but we must convert this to a number of blocks, with
1968              * a small adjustment for estimated slop at the end of a block
1969              * (- 10 words).
1970              */
1971             if (N == 0)
1972             {
1973                 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1974                     / countNurseryBlocks();
1975             }
1976 
1977             /* Estimate a size for the allocation area based on the
1978              * information available.  We might end up going slightly under
1979              * or over the suggested heap size, but we should be pretty
1980              * close on average.
1981              *
1982              * Formula:            suggested - needed
1983              *                ----------------------------
1984              *                    1 + g0_pcnt_kept/100
1985              *
1986              * where 'needed' is the amount of memory needed at the next
1987              * collection for collecting all gens except g0.
1988              */
1989             blocks =
1990                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1991                 (100 + (long)g0_pcnt_kept);
1992 
1993             if (blocks < (long)min_nursery) {
1994                 blocks = min_nursery;
1995             }
1996 
1997             resizeNurseries((W_)blocks);
1998         }
1999         else
2000         {
2001             // we might have added extra blocks to the nursery, so
2002             // resize back to the original size again.
2003             resizeNurseriesFixed();
2004         }
2005     }
2006 }
2007 
2008 /* -----------------------------------------------------------------------------
2009    Sanity code for CAF garbage collection.
2010 
2011    With DEBUG turned on, we manage a CAF list in addition to the SRT
2012    mechanism.  After GC, we run down the CAF list and make any
2013    CAFs which have been garbage collected GCD_CAF.  This means we get an error
2014    whenever the program tries to enter a garbage collected CAF.
2015 
2016    Any garbage collected CAFs are taken off the CAF list at the same
2017    time.
2018    -------------------------------------------------------------------------- */
2019 
2020 #if defined(DEBUG)
2021 
gcCAFs(void)2022 void gcCAFs(void)
2023 {
2024     uint32_t i = 0;
2025     StgIndStatic *prev = NULL;
2026 
2027     for (StgIndStatic *p = debug_caf_list;
2028          p != (StgIndStatic*) END_OF_CAF_LIST;
2029          p = (StgIndStatic*) p->saved_info)
2030     {
2031         const StgInfoTable *info = get_itbl((StgClosure*)p);
2032         ASSERT(info->type == IND_STATIC);
2033 
2034         // See Note [STATIC_LINK fields] in Storage.h
2035         // This condition identifies CAFs that have just been GC'd and
2036         // don't have static_link==3 which means they should be ignored.
2037         if ((((StgWord)(p->static_link)&STATIC_BITS) | prev_static_flag) != 3) {
2038             debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", p);
2039             SET_INFO((StgClosure*)p,&stg_GCD_CAF_info); // stub it
2040             if (prev == NULL) {
2041                 debug_caf_list = (StgIndStatic*)p->saved_info;
2042             } else {
2043                 prev->saved_info = p->saved_info;
2044             }
2045         } else {
2046             prev = p;
2047             i++;
2048         }
2049     }
2050 
2051     debugTrace(DEBUG_gccafs, "%d CAFs live", i);
2052 }
2053 #endif
2054 
2055 
2056 /* -----------------------------------------------------------------------------
2057    The GC can leave some work for the mutator to do before the next
2058    GC, provided the work can be safely overlapped with mutation.  This
2059    can help reduce the GC pause time.
2060 
2061    The mutator can call doIdleGCWork() any time it likes, but
2062    preferably when it is idle.  It's safe for multiple capabilities to
2063    call doIdleGCWork().
2064 
2065    When 'all' is
2066      * false: doIdleGCWork() should only take a short, bounded, amount
2067        of time.
2068      * true: doIdleGCWork() will complete all the outstanding GC work.
2069 
2070    The return value is
2071      * true if there's more to do (only if 'all' is false).
2072      * false otherwise.
2073   -------------------------------------------------------------------------- */
2074 
doIdleGCWork(Capability * cap STG_UNUSED,bool all)2075 bool doIdleGCWork(Capability *cap STG_UNUSED, bool all)
2076 {
2077     return runSomeFinalizers(all);
2078 }
2079