1 /*
2     Title:      Multi-Threaded Garbage Collector
3 
4     Copyright (c) 2010-12, 2019, 2020 David C. J. Matthews
5 
6     Based on the original garbage collector code
7         Copyright 2000-2008
8         Cambridge University Technical Services Limited
9 
10     This library is free software; you can redistribute it and/or
11     modify it under the terms of the GNU Lesser General Public
12     License as published by the Free Software Foundation; either
13     version 2.1 of the License, or (at your option) any later version.
14 
15     This library is distributed in the hope that it will be useful,
16     but WITHOUT ANY WARRANTY; without even the implied warranty of
17     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18     Lesser General Public License for more details.
19 
20     You should have received a copy of the GNU Lesser General Public
21     License along with this library; if not, write to the Free Software
22     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
23 
24 */
25 #ifdef HAVE_CONFIG_H
26 #include "config.h"
27 #elif defined(_WIN32)
28 #include "winconfig.h"
29 #else
30 #error "No configuration file"
31 #endif
32 
33 #ifdef HAVE_ASSERT_H
34 #include <assert.h>
35 #define ASSERT(x)   assert(x)
36 #else
37 #define ASSERT(x)
38 #endif
39 
40 #include "globals.h"
41 #include "run_time.h"
42 #include "machine_dep.h"
43 #include "diagnostics.h"
44 #include "processes.h"
45 #include "timing.h"
46 #include "gc.h"
47 #include "scanaddrs.h"
48 #include "check_objects.h"
49 #include "osmem.h"
50 #include "bitmap.h"
51 #include "rts_module.h"
52 #include "memmgr.h"
53 #include "gctaskfarm.h"
54 #include "mpoly.h"
55 #include "statistics.h"
56 #include "profiling.h"
57 #include "heapsizing.h"
58 #include "gc_progress.h"
59 
60 static GCTaskFarm gTaskFarm; // Global task farm.
61 GCTaskFarm *gpTaskFarm = &gTaskFarm;
62 
63 // If the GC converts a weak ref from SOME to NONE it sets this ref.  It can be
64 // cleared by the signal handler thread.  There's no need for a lock since it
65 // is only set during GC and only cleared when not GCing.
66 bool convertedWeak = false;
67 
68 /*
69     How the garbage collector works.
70     The GC has two phases.  The minor (quick) GC is a copying collector that
71     copies data from the allocation area into the mutable and immutable area.
72     The major collector is started when either the mutable or the immutable
73     area is full.  The major collector uses a mark/sweep scheme.
74     The GC has three phases:
75 
76     1.  Mark phase.
77     Working from the roots; which are the the permanent mutable segments and
78     the RTS roots (e.g. thread stacks), mark all reachable cells.
79     Marking involves setting bits in the bitmap for reachable words.
80 
81     2. Compact phase.
82     Marked objects are copied to try to compact, upwards, the heap segments.  When
83     an object is moved the length word of the object in the old location is set as
84     a tombstone that points to its new location.  In particular this means that we
85     cannot reuse the space where an object previously was during the compaction phase.
86     Immutable objects are moved into immutable segments.  When an object is moved
87     to a new location the bits are set in the bitmap as though the object had been
88     marked at that location.
89 
90     3. Update phase.
91     The roots and objects marked during the first two phases are scanned and any
92     addresses for moved objects are updated.  The lowest address used in the area
93     then becomes the base of the area for future allocations.
94 
95     There is a sharing phase which may be performed before the mark phase.  This
96     merges immutable cells with the same contents with the aim of reducing the
97     size of the live data.  It is expensive so is not performed by default.
98 
99     Updated DCJM 12/06/12
100 
101 */
doGC(const POLYUNSIGNED wordsRequiredToAllocate)102 static bool doGC(const POLYUNSIGNED wordsRequiredToAllocate)
103 {
104     gHeapSizeParameters.RecordAtStartOfMajorGC();
105     gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeStart);
106     globalStats.incCount(PSC_GC_FULLGC);
107 
108     // Remove any empty spaces.  There will not normally be any except
109     // if we have triggered a full GC as a result of detecting paging in the
110     // minor GC but in that case we want to try to stop the system writing
111     // out areas that are now empty.
112     gMem.RemoveEmptyLocals();
113 
114     if (debugOptions & DEBUG_GC)
115         Log("GC: Full GC, %lu words required %" PRI_SIZET " spaces\n", wordsRequiredToAllocate, gMem.lSpaces.size());
116 
117     if (debugOptions & DEBUG_HEAPSIZE)
118         gMem.ReportHeapSizes("Full GC (before)");
119 
120     // Data sharing pass.
121     if (gHeapSizeParameters.PerformSharingPass())
122     {
123         globalStats.incCount(PSC_GC_SHARING);
124         GCSharingPhase();
125     }
126 
127     gcProgressBeginMajorGC(); // The GC sharing phase is treated separately
128 
129 /*
130  * There is a really weird bug somewhere.  An extra bit may be set in the bitmap during
131  * the mark phase.  It seems to be related to heavy swapping activity.  Duplicating the
132  * bitmap causes it to occur only in one copy and write-protecting the bitmap apart from
133  * when it is actually being updated does not result in a seg-fault.  So far I've only
134  * seen it on 64-bit Linux but it may be responsible for other crashes.  The work-around
135  * is to check the number of bits set in the bitmap and repeat the mark phase if it does
136  * not match.
137  */
138 
139     for (unsigned p = 3; p > 0; p--)
140     {
141         for(std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
142         {
143             LocalMemSpace *lSpace = *i;
144             ASSERT (lSpace->top >= lSpace->upperAllocPtr);
145             ASSERT (lSpace->upperAllocPtr >= lSpace->lowerAllocPtr);
146             ASSERT (lSpace->lowerAllocPtr >= lSpace->bottom);
147             // Set upper and lower limits of weak refs.
148             lSpace->highestWeak = lSpace->bottom;
149             lSpace->lowestWeak = lSpace->top;
150             lSpace->fullGCLowerLimit = lSpace->top;
151             // Put dummy objects in the unused space.  This allows
152             // us to scan over the whole of the space.
153             gMem.FillUnusedSpace(lSpace->lowerAllocPtr,
154                 lSpace->upperAllocPtr-lSpace->lowerAllocPtr);
155         }
156 
157         // Set limits of weak refs.
158         for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
159         {
160             PermanentMemSpace *pSpace = *i;
161             pSpace->highestWeak = pSpace->bottom;
162             pSpace->lowestWeak = pSpace->top;
163         }
164 
165         /* Mark phase */
166         GCMarkPhase();
167 
168         uintptr_t bitCount = 0, markCount = 0;
169 
170         for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
171         {
172             LocalMemSpace *lSpace = *i;
173             markCount += lSpace->i_marked + lSpace->m_marked;
174             bitCount += lSpace->bitmap.CountSetBits(lSpace->spaceSize());
175         }
176 
177         if (markCount == bitCount)
178             break;
179         else
180         {
181             // Report an error.  If this happens again we crash.
182             Log("GC: Count error mark count %lu, bitCount %lu\n", markCount, bitCount);
183             if (p == 1)
184             {
185                 ASSERT(markCount == bitCount);
186             }
187         }
188     }
189     for(std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
190     {
191         LocalMemSpace *lSpace = *i;
192         // Reset the allocation pointers.  They will be set to the
193         // limits of the retained data.
194 #ifdef POLYML32IN64
195         lSpace->lowerAllocPtr = lSpace->bottom+1; // Must be odd-word aligned
196         lSpace->lowerAllocPtr[-1] = PolyWord::FromUnsigned(0);
197 #else
198         lSpace->lowerAllocPtr = lSpace->bottom;
199 #endif
200         lSpace->upperAllocPtr = lSpace->top;
201     }
202 
203 	gcProgressSetPercent(25);
204 
205     if (debugOptions & DEBUG_GC) Log("GC: Check weak refs\n");
206     /* Detect unreferenced streams, windows etc. */
207     GCheckWeakRefs();
208 	gcProgressSetPercent(50);
209 
210     // Check that the heap is not overfull.  We make sure the marked
211     // mutable and immutable data is no more than 90% of the
212     // corresponding areas.  This is a very coarse adjustment.
213     {
214         uintptr_t iMarked = 0, mMarked = 0;
215         uintptr_t iSpace = 0, mSpace = 0;
216         for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
217         {
218             LocalMemSpace *lSpace = *i;
219             iMarked += lSpace->i_marked;
220             mMarked += lSpace->m_marked;
221             if (! lSpace->allocationSpace)
222             {
223                 if (lSpace->isMutable)
224                     mSpace += lSpace->spaceSize();
225                 else
226                     iSpace += lSpace->spaceSize();
227             }
228         }
229         // Add space if necessary and possible.
230         while (iMarked > iSpace - iSpace/10 && gHeapSizeParameters.AddSpaceBeforeCopyPhase(false) != 0)
231             iSpace += gMem.DefaultSpaceSize();
232         while (mMarked > mSpace - mSpace/10 && gHeapSizeParameters.AddSpaceBeforeCopyPhase(true) != 0)
233             mSpace += gMem.DefaultSpaceSize();
234     }
235 
236     /* Compact phase */
237     GCCopyPhase();
238 
239     gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeIntermediate, "Copy");
240 	gcProgressSetPercent(75);
241 
242     // Update Phase.
243     if (debugOptions & DEBUG_GC) Log("GC: Update\n");
244     GCUpdatePhase();
245 
246     gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeIntermediate, "Update");
247 
248     {
249         uintptr_t iUpdated = 0, mUpdated = 0, iMarked = 0, mMarked = 0;
250         for(std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
251         {
252             LocalMemSpace *lSpace = *i;
253             iMarked += lSpace->i_marked;
254             mMarked += lSpace->m_marked;
255             if (lSpace->isMutable)
256                 mUpdated += lSpace->updated;
257             else
258                 iUpdated += lSpace->updated;
259         }
260         ASSERT(iUpdated+mUpdated == iMarked+mMarked);
261     }
262 
263     // Delete empty spaces.
264     gMem.RemoveEmptyLocals();
265 
266     if (debugOptions & DEBUG_GC_ENHANCED)
267     {
268         for(std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
269         {
270             LocalMemSpace *lSpace = *i;
271             Log("GC: %s space %p %" PRI_SIZET " free in %" PRI_SIZET " words %2.1f%% full\n", lSpace->spaceTypeString(),
272                 lSpace, lSpace->freeSpace(), lSpace->spaceSize(),
273                 ((float)lSpace->allocatedSpace()) * 100 / (float)lSpace->spaceSize());
274         }
275     }
276 
277     // Compute values for statistics
278     globalStats.setSize(PSS_AFTER_LAST_GC, 0);
279     globalStats.setSize(PSS_AFTER_LAST_FULLGC, 0);
280     globalStats.setSize(PSS_ALLOCATION, 0);
281     globalStats.setSize(PSS_ALLOCATION_FREE, 0);
282 
283     for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
284     {
285         LocalMemSpace *space = *i;
286         uintptr_t free = space->freeSpace();
287         globalStats.incSize(PSS_AFTER_LAST_GC, free*sizeof(PolyWord));
288         globalStats.incSize(PSS_AFTER_LAST_FULLGC, free*sizeof(PolyWord));
289         if (space->allocationSpace)
290         {
291             if (space->allocatedSpace() > space->freeSpace()) // It's more than half full
292                 gMem.ConvertAllocationSpaceToLocal(space);
293             else
294             {
295                 globalStats.incSize(PSS_ALLOCATION, free*sizeof(PolyWord));
296                 globalStats.incSize(PSS_ALLOCATION_FREE, free*sizeof(PolyWord));
297             }
298         }
299 #ifdef FILL_UNUSED_MEMORY
300         memset(space->bottom, 0xaa, (char*)space->upperAllocPtr - (char*)space->bottom);
301 #endif
302         if (debugOptions & DEBUG_GC_ENHANCED)
303             Log("GC: %s space %p %" PRI_SIZET " free in %" PRI_SIZET " words %2.1f%% full\n", space->spaceTypeString(),
304                 space, space->freeSpace(), space->spaceSize(),
305                 ((float)space->allocatedSpace()) * 100 / (float)space->spaceSize());
306     }
307 
308     // End of garbage collection
309     gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeEnd);
310 
311     // Now we've finished we can adjust the heap sizes.
312     gHeapSizeParameters.AdjustSizeAfterMajorGC(wordsRequiredToAllocate);
313     gHeapSizeParameters.resetMajorTimingData();
314 
315     bool haveSpace = gMem.CheckForAllocation(wordsRequiredToAllocate);
316 
317     // Invariant: the bitmaps are completely clean.
318     if (debugOptions & DEBUG_GC)
319     {
320         if (haveSpace)
321             Log("GC: Completed successfully\n");
322         else Log("GC: Completed with insufficient space\n");
323     }
324 
325     if (debugOptions & DEBUG_HEAPSIZE)
326         gMem.ReportHeapSizes("Full GC (after)");
327 
328 //    if (profileMode == kProfileLiveData || profileMode == kProfileLiveMutables)
329 //        printprofile();
330 
331     CheckMemory();
332 
333     return haveSpace; // Completed
334 }
335 
336 // Create the initial heap.  hsize, isize and msize are the requested heap sizes
337 // from the user arguments in units of kbytes.
338 // Fills in the defaults and attempts to allocate the heap.  If the heap size
339 // is too large it allocates as much as it can.  The default heap size is half the
340 // physical memory.
CreateHeap()341 void CreateHeap()
342 {
343     // Create an initial allocation space.
344     if (gMem.CreateAllocationSpace(gMem.DefaultSpaceSize()) == 0)
345         Exit("Insufficient memory to allocate the heap");
346 
347     // Create the task farm if required
348     if (userOptions.gcthreads != 1)
349     {
350         if (! gTaskFarm.Initialise(userOptions.gcthreads, 100))
351             Crash("Unable to initialise the GC task farm");
352     }
353     // Set up the stacks for the mark phase.
354     initialiseMarkerTables();
355 }
356 
357 class FullGCRequest: public MainThreadRequest
358 {
359 public:
FullGCRequest()360     FullGCRequest(): MainThreadRequest(MTP_GCPHASEMARK) {}
Perform()361     virtual void Perform()
362     {
363         doGC (0);
364     }
365 };
366 
367 class QuickGCRequest: public MainThreadRequest
368 {
369 public:
QuickGCRequest(POLYUNSIGNED words)370     QuickGCRequest(POLYUNSIGNED words): MainThreadRequest(MTP_GCPHASEMARK), wordsRequired(words) {}
371 
Perform()372     virtual void Perform()
373     {
374         result =
375 #ifndef DEBUG_ONLY_FULL_GC
376 // If DEBUG_ONLY_FULL_GC is defined then we skip the partial GC.
377             RunQuickGC(wordsRequired) ||
378 #endif
379             doGC (wordsRequired);
380     }
381 
382     bool result;
383     POLYUNSIGNED wordsRequired;
384 };
385 
386 // Perform a full garbage collection.  This is called either from ML via the full_gc RTS call
387 // or from various RTS functions such as open_file to try to recover dropped file handles.
FullGC(TaskData * taskData)388 void FullGC(TaskData *taskData)
389 {
390     FullGCRequest request;
391     processes->MakeRootRequest(taskData, &request);
392 
393     if (convertedWeak)
394         // Notify the signal thread to broadcast on the condition var when
395         // the GC is complete.  We mustn't call SignalArrived within the GC
396         // because it locks schedLock and the main GC thread already holds schedLock.
397         processes->SignalArrived();
398 }
399 
400 // This is the normal call when memory is exhausted and we need to garbage collect.
QuickGC(TaskData * taskData,POLYUNSIGNED wordsRequiredToAllocate)401 bool QuickGC(TaskData *taskData, POLYUNSIGNED wordsRequiredToAllocate)
402 {
403     QuickGCRequest request(wordsRequiredToAllocate);
404     processes->MakeRootRequest(taskData, &request);
405 
406     if (convertedWeak)
407         processes->SignalArrived();
408 
409     return request.result;
410 }
411 
412 // Called in RunShareData.  This is called as a root function
FullGCForShareCommonData(void)413 void FullGCForShareCommonData(void)
414 {
415     doGC(0);
416 }
417 
418 // RTS module for the GC.  Only used for ForkChild.
419 class GarbageCollectModule : public RtsModule
420 {
421 public:
422     virtual void ForkChild(void);
423 };
424 
425 // Set single threaded mode. This is only used in a child process after
426 // Posix fork in case there is a GC before the exec.
ForkChild(void)427 void GarbageCollectModule::ForkChild(void)
428 {
429     gpTaskFarm->SetSingleThreaded();
430     initialiseMarkerTables();
431 }
432 
433 // Declare this.  It will be automatically added to the table.
434 static GarbageCollectModule gcModule;
435