1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team 1998-2008
4  *
5  * Generational garbage collector
6  *
7  * Documentation on the architecture of the Garbage Collector can be
8  * found in the online commentary:
9  *
10  *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/storage/gc
11  *
12  * ---------------------------------------------------------------------------*/
13 
14 #pragma once
15 
16 #include "WSDeque.h"
17 #include "GetTime.h" // for Ticks
18 
19 #include "BeginPrivate.h"
20 
21 /* -----------------------------------------------------------------------------
22    General scheme
23 
24    ToDo: move this to the wiki when the implementation is done.
25 
26    We're only going to try to parallelise the copying GC for now.  The
27    Plan is as follows.
28 
29    Each thread has a gc_thread structure (see below) which holds its
30    thread-local data.  We'll keep a pointer to this in a thread-local
31    variable, or possibly in a register.
32 
33    In the gc_thread structure is a gen_workspace for each generation.  The
34    primary purpose of the gen_workspace is to hold evacuated objects;
35    when an object is evacuated, it is copied to the "todo" block in
36    the thread's workspace for the appropriate generation.  When the todo
37    block is full, it is pushed to the global gen->todos list, which
38    is protected by a lock.  (in fact we intervene a one-place buffer
39    here to reduce contention).
40 
41    A thread repeatedly grabs a block of work from one of the
42    gen->todos lists, scavenges it, and keeps the scavenged block on
43    its own ws->scavd_list (this is to avoid unnecessary contention
44    returning the completed buffers back to the generation: we can just
45    collect them all later).
46 
47    When there is no global work to do, we start scavenging the todo
48    blocks in the workspaces.  This is where the scan_bd field comes
49    in: we can scan the contents of the todo block, when we have
50    scavenged the contents of the todo block (up to todo_bd->free), we
51    don't want to move this block immediately to the scavd_list,
52    because it is probably only partially full.  So we remember that we
53    have scanned up to this point by saving the block in ws->scan_bd,
54    with the current scan pointer in ws->scan.  Later, when more
55    objects have been copied to this block, we can come back and scan
56    the rest.  When we visit this workspace again in the future,
57    scan_bd may still be the same as todo_bd, or it might be different:
58    if enough objects were copied into this block that it filled up,
59    then we will have allocated a new todo block, but *not* pushed the
60    old one to the generation, because it is partially scanned.
61 
62    The reason to leave scanning the todo blocks until last is that we
63    want to deal with full blocks as far as possible.
64    ------------------------------------------------------------------------- */
65 
66 
67 /* -----------------------------------------------------------------------------
68    Generation Workspace
69 
70    A generation workspace exists for each generation for each GC
71    thread. The GC thread takes a block from the todos list of the
72    generation into the scanbd and then scans it.  Objects referred to
73    by those in the scan block are copied into the todo or scavd blocks
74    of the relevant generation.
75 
76    ------------------------------------------------------------------------- */
77 
78 typedef struct gen_workspace_ {
79     generation * gen;           // the gen for this workspace
80     struct gc_thread_ * my_gct; // the gc_thread that contains this workspace
81 
82     // where objects to be scavenged go
83     bdescr *     todo_bd;
84     StgPtr       todo_free;            // free ptr for todo_bd
85     StgPtr       todo_lim;             // lim for todo_bd
86     struct NonmovingSegment *todo_seg; // only available for oldest gen workspace
87 
88     WSDeque *    todo_q;
89     bdescr *     todo_overflow;
90     uint32_t     n_todo_overflow;
91 
92     // where large objects to be scavenged go
93     bdescr *     todo_large_objects;
94 
95     // Objects that have already been scavenged.
96     bdescr *     scavd_list;
97     StgWord      n_scavd_blocks;     // count of blocks in this list
98     StgWord      n_scavd_words;
99 
100     // Partially-full, scavenged, blocks
101     bdescr *     part_list;
102     StgWord      n_part_blocks;      // count of above
103     StgWord      n_part_words;
104 } gen_workspace ATTRIBUTE_ALIGNED(64);
105 // align so that computing gct->gens[n] is a shift, not a multiply
106 // fails if the size is <64, which is why we need the pad above
107 
108 /* ----------------------------------------------------------------------------
109    GC thread object
110 
111    Every GC thread has one of these. It contains all the generation
112    specific workspaces and other GC thread local information. At some
113    later point it maybe useful to move this other into the TLS store
114    of the GC threads
115    ------------------------------------------------------------------------- */
116 
117 /* values for the wakeup field */
118 #define GC_THREAD_INACTIVE             0
119 #define GC_THREAD_STANDING_BY          1
120 #define GC_THREAD_RUNNING              2
121 #define GC_THREAD_WAITING_TO_CONTINUE  3
122 
123 typedef struct gc_thread_ {
124     Capability *cap;
125 
126 #if defined(THREADED_RTS)
127     OSThreadId id;                 // The OS thread that this struct belongs to
128     SpinLock   gc_spin;
129     SpinLock   mut_spin;
130     volatile StgWord wakeup;       // NB not StgWord8; only StgWord is guaranteed atomic
131 #endif
132     uint32_t thread_index;         // a zero based index identifying the thread
133 
134     bdescr * free_blocks;          // a buffer of free blocks for this thread
135                                    //  during GC without accessing the block
136                                    //   allocators spin lock.
137 
138     // These two lists are chained through the STATIC_LINK() fields of static
139     // objects.  Pointers are tagged with the current static_flag, so before
140     // following a pointer, untag it with UNTAG_STATIC_LIST_PTR().
141     StgClosure* static_objects;            // live static objects
142     StgClosure* scavenged_static_objects;  // static objects scavenged so far
143 
144     W_ gc_count;                   // number of GCs this thread has done
145 
146     // block that is currently being scanned
147     bdescr *     scan_bd;
148 
149     // Remembered sets on this CPU.  Each GC thread has its own
150     // private per-generation remembered sets, so it can add an item
151     // to the remembered set without taking a lock.  The mut_lists
152     // array on a gc_thread is the same as the one on the
153     // corresponding Capability; we stash it here too for easy access
154     // during GC; see recordMutableGen_GC().
155     bdescr **    mut_lists;
156 
157     // --------------------
158     // evacuate flags
159 
160     uint32_t evac_gen_no;          // Youngest generation that objects
161                                    // should be evacuated to in
162                                    // evacuate().  (Logically an
163                                    // argument to evacuate, but it's
164                                    // static a lot of the time so we
165                                    // optimise it into a per-thread
166                                    // variable).
167 
168     bool failed_to_evac;           // failure to evacuate an object typically
169                                    // Causes it to be recorded in the mutable
170                                    // object list
171 
172     bool eager_promotion;          // forces promotion to the evac gen
173                                    // instead of the to-space
174                                    // corresponding to the object
175 
176     W_ thunk_selector_depth;       // used to avoid unbounded recursion in
177                                    // evacuate() for THUNK_SELECTOR
178 
179     // -------------------
180     // stats
181 
182     W_ copied;
183     W_ scanned;
184     W_ any_work;
185     W_ no_work;
186     W_ scav_find_work;
187 
188     Time gc_start_cpu;             // thread CPU time
189     Time gc_end_cpu;               // thread CPU time
190     Time gc_sync_start_elapsed;    // start of GC sync
191     Time gc_start_elapsed;         // process elapsed time
192     Time gc_end_elapsed;           // process elapsed time
193     W_ gc_start_faults;
194 
195     // -------------------
196     // workspaces
197 
198     // array of workspaces, indexed by gen->abs_no.  This is placed
199     // directly at the end of the gc_thread structure so that we can get from
200     // the gc_thread pointer to a workspace using only pointer
201     // arithmetic, no memory access.  This happens in the inner loop
202     // of the GC, see Evac.c:alloc_for_copy().
203     gen_workspace gens[];
204 } gc_thread;
205 
206 
207 extern uint32_t n_gc_threads;
208 
209 extern gc_thread **gc_threads;
210 
211 #if defined(THREADED_RTS) && defined(CC_LLVM_BACKEND)
212 extern ThreadLocalKey gctKey;
213 #endif
214 
215 #include "EndPrivate.h"
216