1 /* ----------------------------------------------------------------------------- 2 * 3 * (c) The GHC Team 1998-2008 4 * 5 * Generational garbage collector 6 * 7 * Documentation on the architecture of the Garbage Collector can be 8 * found in the online commentary: 9 * 10 * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/storage/gc 11 * 12 * ---------------------------------------------------------------------------*/ 13 14 #pragma once 15 16 #include "WSDeque.h" 17 #include "GetTime.h" // for Ticks 18 19 #include "BeginPrivate.h" 20 21 /* ----------------------------------------------------------------------------- 22 General scheme 23 24 ToDo: move this to the wiki when the implementation is done. 25 26 We're only going to try to parallelise the copying GC for now. The 27 Plan is as follows. 28 29 Each thread has a gc_thread structure (see below) which holds its 30 thread-local data. We'll keep a pointer to this in a thread-local 31 variable, or possibly in a register. 32 33 In the gc_thread structure is a gen_workspace for each generation. The 34 primary purpose of the gen_workspace is to hold evacuated objects; 35 when an object is evacuated, it is copied to the "todo" block in 36 the thread's workspace for the appropriate generation. When the todo 37 block is full, it is pushed to the global gen->todos list, which 38 is protected by a lock. (in fact we intervene a one-place buffer 39 here to reduce contention). 40 41 A thread repeatedly grabs a block of work from one of the 42 gen->todos lists, scavenges it, and keeps the scavenged block on 43 its own ws->scavd_list (this is to avoid unnecessary contention 44 returning the completed buffers back to the generation: we can just 45 collect them all later). 46 47 When there is no global work to do, we start scavenging the todo 48 blocks in the workspaces. This is where the scan_bd field comes 49 in: we can scan the contents of the todo block, when we have 50 scavenged the contents of the todo block (up to todo_bd->free), we 51 don't want to move this block immediately to the scavd_list, 52 because it is probably only partially full. So we remember that we 53 have scanned up to this point by saving the block in ws->scan_bd, 54 with the current scan pointer in ws->scan. Later, when more 55 objects have been copied to this block, we can come back and scan 56 the rest. When we visit this workspace again in the future, 57 scan_bd may still be the same as todo_bd, or it might be different: 58 if enough objects were copied into this block that it filled up, 59 then we will have allocated a new todo block, but *not* pushed the 60 old one to the generation, because it is partially scanned. 61 62 The reason to leave scanning the todo blocks until last is that we 63 want to deal with full blocks as far as possible. 64 ------------------------------------------------------------------------- */ 65 66 67 /* ----------------------------------------------------------------------------- 68 Generation Workspace 69 70 A generation workspace exists for each generation for each GC 71 thread. The GC thread takes a block from the todos list of the 72 generation into the scanbd and then scans it. Objects referred to 73 by those in the scan block are copied into the todo or scavd blocks 74 of the relevant generation. 75 76 ------------------------------------------------------------------------- */ 77 78 typedef struct gen_workspace_ { 79 generation * gen; // the gen for this workspace 80 struct gc_thread_ * my_gct; // the gc_thread that contains this workspace 81 82 // where objects to be scavenged go 83 bdescr * todo_bd; 84 StgPtr todo_free; // free ptr for todo_bd 85 StgPtr todo_lim; // lim for todo_bd 86 struct NonmovingSegment *todo_seg; // only available for oldest gen workspace 87 88 WSDeque * todo_q; 89 bdescr * todo_overflow; 90 uint32_t n_todo_overflow; 91 92 // where large objects to be scavenged go 93 bdescr * todo_large_objects; 94 95 // Objects that have already been scavenged. 96 bdescr * scavd_list; 97 StgWord n_scavd_blocks; // count of blocks in this list 98 StgWord n_scavd_words; 99 100 // Partially-full, scavenged, blocks 101 bdescr * part_list; 102 StgWord n_part_blocks; // count of above 103 StgWord n_part_words; 104 } gen_workspace ATTRIBUTE_ALIGNED(64); 105 // align so that computing gct->gens[n] is a shift, not a multiply 106 // fails if the size is <64, which is why we need the pad above 107 108 /* ---------------------------------------------------------------------------- 109 GC thread object 110 111 Every GC thread has one of these. It contains all the generation 112 specific workspaces and other GC thread local information. At some 113 later point it maybe useful to move this other into the TLS store 114 of the GC threads 115 ------------------------------------------------------------------------- */ 116 117 /* values for the wakeup field */ 118 #define GC_THREAD_INACTIVE 0 119 #define GC_THREAD_STANDING_BY 1 120 #define GC_THREAD_RUNNING 2 121 #define GC_THREAD_WAITING_TO_CONTINUE 3 122 123 typedef struct gc_thread_ { 124 Capability *cap; 125 126 #if defined(THREADED_RTS) 127 OSThreadId id; // The OS thread that this struct belongs to 128 SpinLock gc_spin; 129 SpinLock mut_spin; 130 volatile StgWord wakeup; // NB not StgWord8; only StgWord is guaranteed atomic 131 #endif 132 uint32_t thread_index; // a zero based index identifying the thread 133 134 bdescr * free_blocks; // a buffer of free blocks for this thread 135 // during GC without accessing the block 136 // allocators spin lock. 137 138 // These two lists are chained through the STATIC_LINK() fields of static 139 // objects. Pointers are tagged with the current static_flag, so before 140 // following a pointer, untag it with UNTAG_STATIC_LIST_PTR(). 141 StgClosure* static_objects; // live static objects 142 StgClosure* scavenged_static_objects; // static objects scavenged so far 143 144 W_ gc_count; // number of GCs this thread has done 145 146 // block that is currently being scanned 147 bdescr * scan_bd; 148 149 // Remembered sets on this CPU. Each GC thread has its own 150 // private per-generation remembered sets, so it can add an item 151 // to the remembered set without taking a lock. The mut_lists 152 // array on a gc_thread is the same as the one on the 153 // corresponding Capability; we stash it here too for easy access 154 // during GC; see recordMutableGen_GC(). 155 bdescr ** mut_lists; 156 157 // -------------------- 158 // evacuate flags 159 160 uint32_t evac_gen_no; // Youngest generation that objects 161 // should be evacuated to in 162 // evacuate(). (Logically an 163 // argument to evacuate, but it's 164 // static a lot of the time so we 165 // optimise it into a per-thread 166 // variable). 167 168 bool failed_to_evac; // failure to evacuate an object typically 169 // Causes it to be recorded in the mutable 170 // object list 171 172 bool eager_promotion; // forces promotion to the evac gen 173 // instead of the to-space 174 // corresponding to the object 175 176 W_ thunk_selector_depth; // used to avoid unbounded recursion in 177 // evacuate() for THUNK_SELECTOR 178 179 // ------------------- 180 // stats 181 182 W_ copied; 183 W_ scanned; 184 W_ any_work; 185 W_ no_work; 186 W_ scav_find_work; 187 188 Time gc_start_cpu; // thread CPU time 189 Time gc_end_cpu; // thread CPU time 190 Time gc_sync_start_elapsed; // start of GC sync 191 Time gc_start_elapsed; // process elapsed time 192 Time gc_end_elapsed; // process elapsed time 193 W_ gc_start_faults; 194 195 // ------------------- 196 // workspaces 197 198 // array of workspaces, indexed by gen->abs_no. This is placed 199 // directly at the end of the gc_thread structure so that we can get from 200 // the gc_thread pointer to a workspace using only pointer 201 // arithmetic, no memory access. This happens in the inner loop 202 // of the GC, see Evac.c:alloc_for_copy(). 203 gen_workspace gens[]; 204 } gc_thread; 205 206 207 extern uint32_t n_gc_threads; 208 209 extern gc_thread **gc_threads; 210 211 #if defined(THREADED_RTS) && defined(CC_LLVM_BACKEND) 212 extern ThreadLocalKey gctKey; 213 #endif 214 215 #include "EndPrivate.h" 216