1 /*
2  * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
3  * Copyright (c) 1991-1995 by Xerox Corporation.  All rights reserved.
4  * Copyright (c) 2000 by Hewlett-Packard Company.  All rights reserved.
5  *
6  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
7  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
8  *
9  * Permission is hereby granted to use or copy this program
10  * for any purpose,  provided the above notices are retained on all copies.
11  * Permission to modify the code and to distribute modified code is granted,
12  * provided the above notices are retained, and a notice that the code was
13  * modified is included with the above copyright notice.
14  *
15  */
16 
17 #if defined(__MINGW32__) && !defined(__MINGW_EXCPT_DEFINE_PSDK) \
18     && defined(__i386__) /* cannot use macros from gcconfig.h */
19   /* Otherwise EXCEPTION_REGISTRATION type declaration from winnt.h     */
20   /* might be used.  That declaration has "handler" callback with NTAPI */
21   /* attribute.  The proper type (with "handler" field compatible with  */
22   /* GC mark_ex_handler) is declared in excpt.h.  The given macro is    */
23   /* defined before any system header include.                          */
24 # define __MINGW_EXCPT_DEFINE_PSDK 1
25 #endif
26 
27 #include "private/gc_pmark.h"
28 
29 #include <stdio.h>
30 
31 #if defined(MSWIN32) && defined(__GNUC__)
32 # include <excpt.h>
33 #endif
34 
35 /* Make arguments appear live to compiler.  Put here to minimize the    */
36 /* risk of inlining.  Used to minimize junk left in registers.          */
37 GC_ATTR_NOINLINE
GC_noop6(word arg1 GC_ATTR_UNUSED,word arg2 GC_ATTR_UNUSED,word arg3 GC_ATTR_UNUSED,word arg4 GC_ATTR_UNUSED,word arg5 GC_ATTR_UNUSED,word arg6 GC_ATTR_UNUSED)38 void GC_noop6(word arg1 GC_ATTR_UNUSED, word arg2 GC_ATTR_UNUSED,
39               word arg3 GC_ATTR_UNUSED, word arg4 GC_ATTR_UNUSED,
40               word arg5 GC_ATTR_UNUSED, word arg6 GC_ATTR_UNUSED)
41 {
42   /* Avoid GC_noop6 calls to be optimized away. */
43 # if defined(AO_HAVE_compiler_barrier) && !defined(BASE_ATOMIC_OPS_EMULATED)
44     AO_compiler_barrier(); /* to serve as a special side-effect */
45 # else
46     GC_noop1(0);
47 # endif
48 }
49 
50 volatile word GC_noop_sink;
51 
52 /* Single argument version, robust against whole program analysis. */
53 GC_ATTR_NO_SANITIZE_THREAD
GC_noop1(word x)54 GC_API void GC_CALL GC_noop1(word x)
55 {
56     GC_noop_sink = x;
57 }
58 
59 /* mark_proc GC_mark_procs[MAX_MARK_PROCS] = {0} -- declared in gc_priv.h */
60 
61 GC_INNER unsigned GC_n_mark_procs = GC_RESERVED_MARK_PROCS;
62 
63 /* Initialize GC_obj_kinds properly and standard free lists properly.   */
64 /* This must be done statically since they may be accessed before       */
65 /* GC_init is called.                                                   */
66 /* It's done here, since we need to deal with mark descriptors.         */
67 GC_INNER struct obj_kind GC_obj_kinds[MAXOBJKINDS] = {
68 /* PTRFREE */ { &GC_aobjfreelist[0], 0 /* filled in dynamically */,
69                 /* 0 | */ GC_DS_LENGTH, FALSE, FALSE
70                 /*, */ OK_DISCLAIM_INITZ },
71 /* NORMAL */  { &GC_objfreelist[0], 0,
72                 /* 0 | */ GC_DS_LENGTH,
73                                 /* adjusted in GC_init for EXTRA_BYTES  */
74                 TRUE /* add length to descr */, TRUE
75                 /*, */ OK_DISCLAIM_INITZ },
76 /* UNCOLLECTABLE */
77               { &GC_uobjfreelist[0], 0,
78                 /* 0 | */ GC_DS_LENGTH, TRUE /* add length to descr */, TRUE
79                 /*, */ OK_DISCLAIM_INITZ },
80 # ifdef GC_ATOMIC_UNCOLLECTABLE
81               { &GC_auobjfreelist[0], 0,
82                 /* 0 | */ GC_DS_LENGTH, FALSE /* add length to descr */, FALSE
83                 /*, */ OK_DISCLAIM_INITZ },
84 # endif
85 };
86 
87 GC_INNER unsigned GC_n_kinds = GC_N_KINDS_INITIAL_VALUE;
88 
89 # ifndef INITIAL_MARK_STACK_SIZE
90 #   define INITIAL_MARK_STACK_SIZE (1*HBLKSIZE)
91                 /* INITIAL_MARK_STACK_SIZE * sizeof(mse) should be a    */
92                 /* multiple of HBLKSIZE.                                */
93                 /* The incremental collector actually likes a larger    */
94                 /* size, since it wants to push all marked dirty        */
95                 /* objects before marking anything new.  Currently we   */
96                 /* let it grow dynamically.                             */
97 # endif
98 
99 #if !defined(GC_DISABLE_INCREMENTAL)
100   STATIC word GC_n_rescuing_pages = 0;
101                                 /* Number of dirty pages we marked from */
102                                 /* excludes ptrfree pages, etc.         */
103                                 /* Used for logging only.               */
104 #endif
105 
106 GC_INNER size_t GC_mark_stack_size = 0;
107 
108 #ifdef PARALLEL_MARK
109   STATIC volatile AO_t GC_first_nonempty = 0;
110         /* Lowest entry on mark stack   */
111         /* that may be nonempty.        */
112         /* Updated only by initiating   */
113         /* thread.                      */
114 #endif
115 
116 GC_INNER mark_state_t GC_mark_state = MS_NONE;
117 
118 GC_INNER GC_bool GC_mark_stack_too_small = FALSE;
119 
120 static struct hblk * scan_ptr;
121 
122 STATIC GC_bool GC_objects_are_marked = FALSE;
123                 /* Are there collectible marked objects in the heap?    */
124 
125 /* Is a collection in progress?  Note that this can return true in the  */
126 /* non-incremental case, if a collection has been abandoned and the     */
127 /* mark state is now MS_INVALID.                                        */
GC_collection_in_progress(void)128 GC_INNER GC_bool GC_collection_in_progress(void)
129 {
130     return(GC_mark_state != MS_NONE);
131 }
132 
133 /* clear all mark bits in the header */
GC_clear_hdr_marks(hdr * hhdr)134 GC_INNER void GC_clear_hdr_marks(hdr *hhdr)
135 {
136   size_t last_bit;
137 
138 # ifdef AO_HAVE_load
139     /* Atomic access is used to avoid racing with GC_realloc.   */
140     last_bit = FINAL_MARK_BIT((size_t)AO_load((volatile AO_t *)&hhdr->hb_sz));
141 # else
142     /* No race as GC_realloc holds the lock while updating hb_sz.   */
143     last_bit = FINAL_MARK_BIT((size_t)hhdr->hb_sz);
144 # endif
145 
146     BZERO(hhdr -> hb_marks, sizeof(hhdr->hb_marks));
147     set_mark_bit_from_hdr(hhdr, last_bit);
148     hhdr -> hb_n_marks = 0;
149 }
150 
151 /* Set all mark bits in the header.  Used for uncollectible blocks. */
GC_set_hdr_marks(hdr * hhdr)152 GC_INNER void GC_set_hdr_marks(hdr *hhdr)
153 {
154     unsigned i;
155     size_t sz = (size_t)hhdr->hb_sz;
156     unsigned n_marks = (unsigned)FINAL_MARK_BIT(sz);
157 
158 #   ifdef USE_MARK_BYTES
159       for (i = 0; i <= n_marks; i += (unsigned)MARK_BIT_OFFSET(sz)) {
160         hhdr -> hb_marks[i] = 1;
161       }
162 #   else
163       for (i = 0; i < divWORDSZ(n_marks + WORDSZ); ++i) {
164         hhdr -> hb_marks[i] = GC_WORD_MAX;
165       }
166 #   endif
167 #   ifdef MARK_BIT_PER_OBJ
168       hhdr -> hb_n_marks = n_marks;
169 #   else
170       hhdr -> hb_n_marks = HBLK_OBJS(sz);
171 #   endif
172 }
173 
174 /*
175  * Clear all mark bits associated with block h.
176  */
clear_marks_for_block(struct hblk * h,word dummy GC_ATTR_UNUSED)177 static void clear_marks_for_block(struct hblk *h, word dummy GC_ATTR_UNUSED)
178 {
179     hdr * hhdr = HDR(h);
180 
181     if (IS_UNCOLLECTABLE(hhdr -> hb_obj_kind)) return;
182         /* Mark bit for these is cleared only once the object is        */
183         /* explicitly deallocated.  This either frees the block, or     */
184         /* the bit is cleared once the object is on the free list.      */
185     GC_clear_hdr_marks(hhdr);
186 }
187 
188 /* Slow but general routines for setting/clearing/asking about mark bits */
GC_set_mark_bit(const void * p)189 GC_API void GC_CALL GC_set_mark_bit(const void *p)
190 {
191     struct hblk *h = HBLKPTR(p);
192     hdr * hhdr = HDR(h);
193     word bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, hhdr -> hb_sz);
194 
195     if (!mark_bit_from_hdr(hhdr, bit_no)) {
196       set_mark_bit_from_hdr(hhdr, bit_no);
197       ++hhdr -> hb_n_marks;
198     }
199 }
200 
GC_clear_mark_bit(const void * p)201 GC_API void GC_CALL GC_clear_mark_bit(const void *p)
202 {
203     struct hblk *h = HBLKPTR(p);
204     hdr * hhdr = HDR(h);
205     word bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, hhdr -> hb_sz);
206 
207     if (mark_bit_from_hdr(hhdr, bit_no)) {
208       size_t n_marks = hhdr -> hb_n_marks;
209 
210       GC_ASSERT(n_marks != 0);
211       clear_mark_bit_from_hdr(hhdr, bit_no);
212       n_marks--;
213 #     ifdef PARALLEL_MARK
214         if (n_marks != 0 || !GC_parallel)
215           hhdr -> hb_n_marks = n_marks;
216         /* Don't decrement to zero.  The counts are approximate due to  */
217         /* concurrency issues, but we need to ensure that a count of    */
218         /* zero implies an empty block.                                 */
219 #     else
220           hhdr -> hb_n_marks = n_marks;
221 #     endif
222     }
223 }
224 
GC_is_marked(const void * p)225 GC_API int GC_CALL GC_is_marked(const void *p)
226 {
227     struct hblk *h = HBLKPTR(p);
228     hdr * hhdr = HDR(h);
229     word bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, hhdr -> hb_sz);
230 
231     return (int)mark_bit_from_hdr(hhdr, bit_no); /* 0 or 1 */
232 }
233 
234 /*
235  * Clear mark bits in all allocated heap blocks.  This invalidates
236  * the marker invariant, and sets GC_mark_state to reflect this.
237  * (This implicitly starts marking to reestablish the invariant.)
238  */
GC_clear_marks(void)239 GC_INNER void GC_clear_marks(void)
240 {
241     GC_apply_to_all_blocks(clear_marks_for_block, (word)0);
242     GC_objects_are_marked = FALSE;
243     GC_mark_state = MS_INVALID;
244     scan_ptr = 0;
245 }
246 
247 /* Initiate a garbage collection.  Initiates a full collection if the   */
248 /* mark state is invalid.                                               */
GC_initiate_gc(void)249 GC_INNER void GC_initiate_gc(void)
250 {
251     GC_ASSERT(I_HOLD_LOCK());
252 #   ifndef GC_DISABLE_INCREMENTAL
253         if (GC_incremental) {
254 #         ifdef CHECKSUMS
255             GC_read_dirty(FALSE);
256 #         else
257             GC_read_dirty(GC_mark_state == MS_INVALID);
258 #         endif
259         }
260 #   endif
261 #   ifdef CHECKSUMS
262         if (GC_incremental) GC_check_dirty();
263 #   endif
264 #   if !defined(GC_DISABLE_INCREMENTAL)
265         GC_n_rescuing_pages = 0;
266 #   endif
267     if (GC_mark_state == MS_NONE) {
268         GC_mark_state = MS_PUSH_RESCUERS;
269     } else if (GC_mark_state != MS_INVALID) {
270         ABORT("Unexpected state");
271     } /* else this is really a full collection, and mark        */
272       /* bits are invalid.                                      */
273     scan_ptr = 0;
274 }
275 
276 #ifdef PARALLEL_MARK
277     STATIC void GC_do_parallel_mark(void); /* initiate parallel marking. */
278 #endif /* PARALLEL_MARK */
279 
280 #ifdef GC_DISABLE_INCREMENTAL
281 # define GC_push_next_marked_dirty(h) GC_push_next_marked(h)
282 #else
283   STATIC struct hblk * GC_push_next_marked_dirty(struct hblk *h);
284                 /* Invoke GC_push_marked on next dirty block above h.   */
285                 /* Return a pointer just past the end of this block.    */
286 #endif /* !GC_DISABLE_INCREMENTAL */
287 STATIC struct hblk * GC_push_next_marked(struct hblk *h);
288                 /* Ditto, but also mark from clean pages.       */
289 STATIC struct hblk * GC_push_next_marked_uncollectable(struct hblk *h);
290                 /* Ditto, but mark only from uncollectible pages.       */
291 
292 static void alloc_mark_stack(size_t);
293 
294 /* Perform a small amount of marking.                   */
295 /* We try to touch roughly a page of memory.            */
296 /* Return TRUE if we just finished a mark phase.        */
297 /* Cold_gc_frame is an address inside a GC frame that   */
298 /* remains valid until all marking is complete.         */
299 /* A zero value indicates that it's OK to miss some     */
300 /* register values.                                     */
301 /* We hold the allocation lock.  In the case of         */
302 /* incremental collection, the world may not be stopped.*/
303 #ifdef WRAP_MARK_SOME
304   /* For win32, this is called after we establish a structured  */
305   /* exception handler, in case Windows unmaps one of our root  */
306   /* segments.  See below.  In either case, we acquire the      */
307   /* allocator lock long before we get here.                    */
GC_mark_some_inner(ptr_t cold_gc_frame)308   STATIC GC_bool GC_mark_some_inner(ptr_t cold_gc_frame)
309 #else
310   GC_INNER GC_bool GC_mark_some(ptr_t cold_gc_frame)
311 #endif
312 {
313     switch(GC_mark_state) {
314         case MS_NONE:
315             break;
316 
317         case MS_PUSH_RESCUERS:
318             if ((word)GC_mark_stack_top
319                 >= (word)(GC_mark_stack_limit - INITIAL_MARK_STACK_SIZE/2)) {
320                 /* Go ahead and mark, even though that might cause us to */
321                 /* see more marked dirty objects later on.  Avoid this   */
322                 /* in the future.                                        */
323                 GC_mark_stack_too_small = TRUE;
324                 MARK_FROM_MARK_STACK();
325                 break;
326             } else {
327                 scan_ptr = GC_push_next_marked_dirty(scan_ptr);
328                 if (scan_ptr == 0) {
329 #                 if !defined(GC_DISABLE_INCREMENTAL)
330                     GC_COND_LOG_PRINTF("Marked from %lu dirty pages\n",
331                                        (unsigned long)GC_n_rescuing_pages);
332 #                 endif
333                     GC_push_roots(FALSE, cold_gc_frame);
334                     GC_objects_are_marked = TRUE;
335                     if (GC_mark_state != MS_INVALID) {
336                         GC_mark_state = MS_ROOTS_PUSHED;
337                     }
338                 }
339             }
340             break;
341 
342         case MS_PUSH_UNCOLLECTABLE:
343             if ((word)GC_mark_stack_top
344                 >= (word)(GC_mark_stack + GC_mark_stack_size/4)) {
345 #               ifdef PARALLEL_MARK
346                   /* Avoid this, since we don't parallelize the marker  */
347                   /* here.                                              */
348                   if (GC_parallel) GC_mark_stack_too_small = TRUE;
349 #               endif
350                 MARK_FROM_MARK_STACK();
351                 break;
352             } else {
353                 scan_ptr = GC_push_next_marked_uncollectable(scan_ptr);
354                 if (scan_ptr == 0) {
355                     GC_push_roots(TRUE, cold_gc_frame);
356                     GC_objects_are_marked = TRUE;
357                     if (GC_mark_state != MS_INVALID) {
358                         GC_mark_state = MS_ROOTS_PUSHED;
359                     }
360                 }
361             }
362             break;
363 
364         case MS_ROOTS_PUSHED:
365 #           ifdef PARALLEL_MARK
366               /* In the incremental GC case, this currently doesn't     */
367               /* quite do the right thing, since it runs to             */
368               /* completion.  On the other hand, starting a             */
369               /* parallel marker is expensive, so perhaps it is         */
370               /* the right thing?                                       */
371               /* Eventually, incremental marking should run             */
372               /* asynchronously in multiple threads, without grabbing   */
373               /* the allocation lock.                                   */
374                 if (GC_parallel) {
375                   GC_do_parallel_mark();
376                   GC_ASSERT((word)GC_mark_stack_top < (word)GC_first_nonempty);
377                   GC_mark_stack_top = GC_mark_stack - 1;
378                   if (GC_mark_stack_too_small) {
379                     alloc_mark_stack(2*GC_mark_stack_size);
380                   }
381                   if (GC_mark_state == MS_ROOTS_PUSHED) {
382                     GC_mark_state = MS_NONE;
383                     return(TRUE);
384                   }
385                   break;
386                 }
387 #           endif
388             if ((word)GC_mark_stack_top >= (word)GC_mark_stack) {
389                 MARK_FROM_MARK_STACK();
390                 break;
391             } else {
392                 GC_mark_state = MS_NONE;
393                 if (GC_mark_stack_too_small) {
394                     alloc_mark_stack(2*GC_mark_stack_size);
395                 }
396                 return(TRUE);
397             }
398 
399         case MS_INVALID:
400         case MS_PARTIALLY_INVALID:
401             if (!GC_objects_are_marked) {
402                 GC_mark_state = MS_PUSH_UNCOLLECTABLE;
403                 break;
404             }
405             if ((word)GC_mark_stack_top >= (word)GC_mark_stack) {
406                 MARK_FROM_MARK_STACK();
407                 break;
408             }
409             if (scan_ptr == 0 && GC_mark_state == MS_INVALID) {
410                 /* About to start a heap scan for marked objects. */
411                 /* Mark stack is empty.  OK to reallocate.        */
412                 if (GC_mark_stack_too_small) {
413                     alloc_mark_stack(2*GC_mark_stack_size);
414                 }
415                 GC_mark_state = MS_PARTIALLY_INVALID;
416             }
417             scan_ptr = GC_push_next_marked(scan_ptr);
418             if (scan_ptr == 0 && GC_mark_state == MS_PARTIALLY_INVALID) {
419                 GC_push_roots(TRUE, cold_gc_frame);
420                 GC_objects_are_marked = TRUE;
421                 if (GC_mark_state != MS_INVALID) {
422                     GC_mark_state = MS_ROOTS_PUSHED;
423                 }
424             }
425             break;
426 
427         default:
428             ABORT("GC_mark_some: bad state");
429     }
430     return(FALSE);
431 }
432 
433 #ifdef WRAP_MARK_SOME
434 
435 # if (defined(MSWIN32) || defined(MSWINCE)) && defined(__GNUC__)
436 
437     typedef struct {
438       EXCEPTION_REGISTRATION ex_reg;
439       void *alt_path;
440     } ext_ex_regn;
441 
mark_ex_handler(struct _EXCEPTION_RECORD * ex_rec,void * est_frame,struct _CONTEXT * context,void * disp_ctxt GC_ATTR_UNUSED)442     static EXCEPTION_DISPOSITION mark_ex_handler(
443         struct _EXCEPTION_RECORD *ex_rec,
444         void *est_frame,
445         struct _CONTEXT *context,
446         void *disp_ctxt GC_ATTR_UNUSED)
447     {
448         if (ex_rec->ExceptionCode == STATUS_ACCESS_VIOLATION) {
449           ext_ex_regn *xer = (ext_ex_regn *)est_frame;
450 
451           /* Unwind from the inner function assuming the standard */
452           /* function prologue.                                   */
453           /* Assumes code has not been compiled with              */
454           /* -fomit-frame-pointer.                                */
455           context->Esp = context->Ebp;
456           context->Ebp = *((DWORD *)context->Esp);
457           context->Esp = context->Esp - 8;
458 
459           /* Resume execution at the "real" handler within the    */
460           /* wrapper function.                                    */
461           context->Eip = (DWORD )(xer->alt_path);
462 
463           return ExceptionContinueExecution;
464 
465         } else {
466             return ExceptionContinueSearch;
467         }
468     }
469 # endif /* __GNUC__ && MSWIN32 */
470 
GC_mark_some(ptr_t cold_gc_frame)471   GC_INNER GC_bool GC_mark_some(ptr_t cold_gc_frame)
472   {
473       GC_bool ret_val;
474 
475 #   if defined(MSWIN32) || defined(MSWINCE)
476 #    ifndef __GNUC__
477       /* Windows 98 appears to asynchronously create and remove  */
478       /* writable memory mappings, for reasons we haven't yet    */
479       /* understood.  Since we look for writable regions to      */
480       /* determine the root set, we may try to mark from an      */
481       /* address range that disappeared since we started the     */
482       /* collection.  Thus we have to recover from faults here.  */
483       /* This code does not appear to be necessary for Windows   */
484       /* 95/NT/2000+. Note that this code should never generate  */
485       /* an incremental GC write fault.                          */
486       /* This code seems to be necessary for WinCE (at least in  */
487       /* the case we'd decide to add MEM_PRIVATE sections to     */
488       /* data roots in GC_register_dynamic_libraries()).         */
489       /* It's conceivable that this is the same issue with       */
490       /* terminating threads that we see with Linux and          */
491       /* USE_PROC_FOR_LIBRARIES.                                 */
492 
493       __try {
494           ret_val = GC_mark_some_inner(cold_gc_frame);
495       } __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
496                 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
497           goto handle_ex;
498       }
499 #     if defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS)
500         /* With DllMain-based thread tracking, a thread may have        */
501         /* started while we were marking.  This is logically equivalent */
502         /* to the exception case; our results are invalid and we have   */
503         /* to start over.  This cannot be prevented since we can't      */
504         /* block in DllMain.                                            */
505         if (GC_started_thread_while_stopped())
506           goto handle_thr_start;
507 #     endif
508      rm_handler:
509       return ret_val;
510 
511 #    else /* __GNUC__ */
512 
513       /* Manually install an exception handler since GCC does    */
514       /* not yet support Structured Exception Handling (SEH) on  */
515       /* Win32.                                                  */
516 
517       ext_ex_regn er;
518 
519 #     if GC_GNUC_PREREQ(4, 7) || GC_CLANG_PREREQ(3, 3)
520 #       pragma GCC diagnostic push
521         /* Suppress "taking the address of label is non-standard" warning. */
522 #       if defined(__clang__) || GC_GNUC_PREREQ(6, 4)
523 #         pragma GCC diagnostic ignored "-Wpedantic"
524 #       else
525           /* GCC before ~4.8 does not accept "-Wpedantic" quietly.  */
526 #         pragma GCC diagnostic ignored "-pedantic"
527 #       endif
528         er.alt_path = &&handle_ex;
529 #       pragma GCC diagnostic pop
530 #     elif !defined(CPPCHECK) /* pragma diagnostic is not supported */
531         er.alt_path = &&handle_ex;
532 #     endif
533       er.ex_reg.handler = mark_ex_handler;
534       __asm__ __volatile__ ("movl %%fs:0, %0" : "=r" (er.ex_reg.prev));
535       __asm__ __volatile__ ("movl %0, %%fs:0" : : "r" (&er));
536       ret_val = GC_mark_some_inner(cold_gc_frame);
537       /* Prevent GCC from considering the following code unreachable */
538       /* and thus eliminating it.                                    */
539         if (er.alt_path == 0)
540           goto handle_ex;
541 #     if defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS)
542         if (GC_started_thread_while_stopped())
543           goto handle_thr_start;
544 #     endif
545     rm_handler:
546       /* Uninstall the exception handler */
547       __asm__ __volatile__ ("mov %0, %%fs:0" : : "r" (er.ex_reg.prev));
548       return ret_val;
549 
550 #    endif /* __GNUC__ */
551 #   else /* !MSWIN32 */
552       /* Here we are handling the case in which /proc is used for root  */
553       /* finding, and we have threads.  We may find a stack for a       */
554       /* thread that is in the process of exiting, and disappears       */
555       /* while we are marking it.  This seems extremely difficult to    */
556       /* avoid otherwise.                                               */
557 #     ifndef DEFAULT_VDB
558         if (GC_auto_incremental) {
559           static GC_bool is_warned = FALSE;
560 
561           if (!is_warned) {
562             is_warned = TRUE;
563             WARN("Incremental GC incompatible with /proc roots\n", 0);
564           }
565           /* I'm not sure if this could still work ...  */
566         }
567 #     endif
568       GC_setup_temporary_fault_handler();
569       if(SETJMP(GC_jmp_buf) != 0) goto handle_ex;
570       ret_val = GC_mark_some_inner(cold_gc_frame);
571     rm_handler:
572       GC_reset_fault_handler();
573       return ret_val;
574 
575 #   endif /* !MSWIN32 */
576 
577 handle_ex:
578     /* Exception handler starts here for all cases. */
579       {
580         static word warned_gc_no;
581 
582         /* Warn about it at most once per collection. */
583         if (warned_gc_no != GC_gc_no) {
584           warned_gc_no = GC_gc_no;
585           WARN("Caught ACCESS_VIOLATION in marker;"
586                " memory mapping disappeared\n", 0);
587         }
588       }
589 #   if (defined(MSWIN32) || defined(MSWINCE)) && defined(GC_WIN32_THREADS) \
590        && !defined(GC_PTHREADS)
591       handle_thr_start:
592 #   endif
593       /* We have bad roots on the stack.  Discard mark stack.   */
594       /* Rescan from marked objects.  Redetermine roots.        */
595 #     ifdef REGISTER_LIBRARIES_EARLY
596         START_WORLD();
597         GC_cond_register_dynamic_libraries();
598         STOP_WORLD();
599 #     endif
600       GC_invalidate_mark_state();
601       scan_ptr = 0;
602 
603       ret_val = FALSE;
604       goto rm_handler;  /* Back to platform-specific code. */
605   }
606 #endif /* WRAP_MARK_SOME */
607 
GC_invalidate_mark_state(void)608 GC_INNER void GC_invalidate_mark_state(void)
609 {
610     GC_mark_state = MS_INVALID;
611     GC_mark_stack_top = GC_mark_stack-1;
612 }
613 
GC_signal_mark_stack_overflow(mse * msp)614 GC_INNER mse * GC_signal_mark_stack_overflow(mse *msp)
615 {
616     GC_mark_state = MS_INVALID;
617 #   ifdef PARALLEL_MARK
618       /* We are using a local_mark_stack in parallel mode, so   */
619       /* do not signal the global mark stack to be resized.     */
620       /* That will be done if required in GC_return_mark_stack. */
621       if (!GC_parallel)
622         GC_mark_stack_too_small = TRUE;
623 #   else
624       GC_mark_stack_too_small = TRUE;
625 #   endif
626     GC_COND_LOG_PRINTF("Mark stack overflow; current size = %lu entries\n",
627                        (unsigned long)GC_mark_stack_size);
628     return(msp - GC_MARK_STACK_DISCARDS);
629 }
630 
631 /*
632  * Mark objects pointed to by the regions described by
633  * mark stack entries between mark_stack and mark_stack_top,
634  * inclusive.  Assumes the upper limit of a mark stack entry
635  * is never 0.  A mark stack entry never has size 0.
636  * We try to traverse on the order of a hblk of memory before we return.
637  * Caller is responsible for calling this until the mark stack is empty.
638  * Note that this is the most performance critical routine in the
639  * collector.  Hence it contains all sorts of ugly hacks to speed
640  * things up.  In particular, we avoid procedure calls on the common
641  * path, we take advantage of peculiarities of the mark descriptor
642  * encoding, we optionally maintain a cache for the block address to
643  * header mapping, we prefetch when an object is "grayed", etc.
644  */
645 GC_ATTR_NO_SANITIZE_ADDR GC_ATTR_NO_SANITIZE_MEMORY GC_ATTR_NO_SANITIZE_THREAD
GC_mark_from(mse * mark_stack_top,mse * mark_stack,mse * mark_stack_limit)646 GC_INNER mse * GC_mark_from(mse *mark_stack_top, mse *mark_stack,
647                             mse *mark_stack_limit)
648 {
649   signed_word credit = HBLKSIZE;  /* Remaining credit for marking work  */
650   ptr_t current_p;      /* Pointer to current candidate ptr.            */
651   word current;         /* Candidate pointer.                           */
652   ptr_t limit = 0;      /* (Incl) limit of current candidate range.     */
653   word descr;
654   ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
655   ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
656   DECLARE_HDR_CACHE;
657 
658 # define SPLIT_RANGE_WORDS 128  /* Must be power of 2.          */
659 
660   GC_objects_are_marked = TRUE;
661   INIT_HDR_CACHE;
662 # ifdef OS2 /* Use untweaked version to circumvent compiler problem */
663     while ((word)mark_stack_top >= (word)mark_stack && credit >= 0)
664 # else
665     while (((((word)mark_stack_top - (word)mark_stack) | (word)credit)
666             & SIGNB) == 0)
667 # endif
668   {
669     current_p = mark_stack_top -> mse_start;
670     descr = mark_stack_top -> mse_descr.w;
671   retry:
672     /* current_p and descr describe the current object.         */
673     /* *mark_stack_top is vacant.                               */
674     /* The following is 0 only for small objects described by a simple  */
675     /* length descriptor.  For many applications this is the common     */
676     /* case, so we try to detect it quickly.                            */
677     if (descr & ((~(WORDS_TO_BYTES(SPLIT_RANGE_WORDS) - 1)) | GC_DS_TAGS)) {
678       word tag = descr & GC_DS_TAGS;
679 
680       GC_STATIC_ASSERT(GC_DS_TAGS == 0x3);
681       switch(tag) {
682         case GC_DS_LENGTH:
683           /* Large length.                                              */
684           /* Process part of the range to avoid pushing too much on the */
685           /* stack.                                                     */
686           GC_ASSERT(descr < (word)GC_greatest_plausible_heap_addr
687                             - (word)GC_least_plausible_heap_addr
688                 || (word)(current_p + descr)
689                             <= (word)GC_least_plausible_heap_addr
690                 || (word)current_p >= (word)GC_greatest_plausible_heap_addr);
691 #         ifdef PARALLEL_MARK
692 #           define SHARE_BYTES 2048
693             if (descr > SHARE_BYTES && GC_parallel
694                 && (word)mark_stack_top < (word)(mark_stack_limit - 1)) {
695               word new_size = (descr/2) & ~(word)(sizeof(word)-1);
696 
697               mark_stack_top -> mse_start = current_p;
698               mark_stack_top -> mse_descr.w = new_size + sizeof(word);
699                                         /* makes sure we handle         */
700                                         /* misaligned pointers.         */
701               mark_stack_top++;
702 #             ifdef ENABLE_TRACE
703                 if ((word)GC_trace_addr >= (word)current_p
704                     && (word)GC_trace_addr < (word)(current_p + descr)) {
705                   GC_log_printf("GC #%u: large section; start %p, len %lu,"
706                                 " splitting (parallel) at %p\n",
707                                 (unsigned)GC_gc_no, (void *)current_p,
708                                 (unsigned long)descr,
709                                 (void *)(current_p + new_size));
710                 }
711 #             endif
712               current_p += new_size;
713               descr -= new_size;
714               goto retry;
715             }
716 #         endif /* PARALLEL_MARK */
717           mark_stack_top -> mse_start =
718                 limit = current_p + WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1);
719           mark_stack_top -> mse_descr.w =
720                                 descr - WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1);
721 #         ifdef ENABLE_TRACE
722             if ((word)GC_trace_addr >= (word)current_p
723                 && (word)GC_trace_addr < (word)(current_p + descr)) {
724               GC_log_printf("GC #%u: large section; start %p, len %lu,"
725                             " splitting at %p\n",
726                             (unsigned)GC_gc_no, (void *)current_p,
727                             (unsigned long)descr, (void *)limit);
728             }
729 #         endif
730           /* Make sure that pointers overlapping the two ranges are     */
731           /* considered.                                                */
732           limit += sizeof(word) - ALIGNMENT;
733           break;
734         case GC_DS_BITMAP:
735           mark_stack_top--;
736 #         ifdef ENABLE_TRACE
737             if ((word)GC_trace_addr >= (word)current_p
738                 && (word)GC_trace_addr < (word)(current_p
739                                                 + WORDS_TO_BYTES(WORDSZ-2))) {
740               GC_log_printf("GC #%u: tracing from %p bitmap descr %lu\n",
741                             (unsigned)GC_gc_no, (void *)current_p,
742                             (unsigned long)descr);
743             }
744 #         endif /* ENABLE_TRACE */
745           descr &= ~GC_DS_TAGS;
746           credit -= WORDS_TO_BYTES(WORDSZ/2); /* guess */
747           while (descr != 0) {
748             if ((descr & SIGNB) != 0) {
749               current = *(word *)current_p;
750               FIXUP_POINTER(current);
751               if (current >= (word)least_ha && current < (word)greatest_ha) {
752                 PREFETCH((ptr_t)current);
753 #               ifdef ENABLE_TRACE
754                   if (GC_trace_addr == current_p) {
755                     GC_log_printf("GC #%u: considering(3) %p -> %p\n",
756                                   (unsigned)GC_gc_no, (void *)current_p,
757                                   (void *)current);
758                   }
759 #               endif /* ENABLE_TRACE */
760                 PUSH_CONTENTS((ptr_t)current, mark_stack_top,
761                               mark_stack_limit, current_p);
762               }
763             }
764             descr <<= 1;
765             current_p += sizeof(word);
766           }
767           continue;
768         case GC_DS_PROC:
769           mark_stack_top--;
770 #         ifdef ENABLE_TRACE
771             if ((word)GC_trace_addr >= (word)current_p
772                 && GC_base(current_p) != 0
773                 && GC_base(current_p) == GC_base(GC_trace_addr)) {
774               GC_log_printf("GC #%u: tracing from %p, proc descr %lu\n",
775                             (unsigned)GC_gc_no, (void *)current_p,
776                             (unsigned long)descr);
777             }
778 #         endif /* ENABLE_TRACE */
779           credit -= GC_PROC_BYTES;
780           mark_stack_top = (*PROC(descr))((word *)current_p, mark_stack_top,
781                                           mark_stack_limit, ENV(descr));
782           continue;
783         case GC_DS_PER_OBJECT:
784           if ((signed_word)descr >= 0) {
785             /* Descriptor is in the object.     */
786             descr = *(word *)(current_p + descr - GC_DS_PER_OBJECT);
787           } else {
788             /* Descriptor is in type descriptor pointed to by first     */
789             /* word in object.                                          */
790             ptr_t type_descr = *(ptr_t *)current_p;
791             /* type_descr is either a valid pointer to the descriptor   */
792             /* structure, or this object was on a free list.            */
793             /* If it was anything but the last object on the free list, */
794             /* we will misinterpret the next object on the free list as */
795             /* the type descriptor, and get a 0 GC descriptor, which    */
796             /* is ideal.  Unfortunately, we need to check for the last  */
797             /* object case explicitly.                                  */
798             if (EXPECT(0 == type_descr, FALSE)) {
799                 mark_stack_top--;
800                 continue;
801             }
802             descr = *(word *)(type_descr
803                               - ((signed_word)descr + (GC_INDIR_PER_OBJ_BIAS
804                                                        - GC_DS_PER_OBJECT)));
805           }
806           if (0 == descr) {
807               /* Can happen either because we generated a 0 descriptor  */
808               /* or we saw a pointer to a free object.          */
809               mark_stack_top--;
810               continue;
811           }
812           goto retry;
813       }
814     } else /* Small object with length descriptor */ {
815       mark_stack_top--;
816 #     ifndef SMALL_CONFIG
817         if (descr < sizeof(word))
818           continue;
819 #     endif
820 #     ifdef ENABLE_TRACE
821         if ((word)GC_trace_addr >= (word)current_p
822             && (word)GC_trace_addr < (word)(current_p + descr)) {
823           GC_log_printf("GC #%u: small object; start %p, len %lu\n",
824                         (unsigned)GC_gc_no, (void *)current_p,
825                         (unsigned long)descr);
826         }
827 #     endif
828       limit = current_p + (word)descr;
829     }
830     /* The simple case in which we're scanning a range. */
831     GC_ASSERT(!((word)current_p & (ALIGNMENT-1)));
832     credit -= limit - current_p;
833     limit -= sizeof(word);
834     {
835 #     define PREF_DIST 4
836 
837 #     ifndef SMALL_CONFIG
838         word deferred;
839 
840         /* Try to prefetch the next pointer to be examined ASAP.        */
841         /* Empirically, this also seems to help slightly without        */
842         /* prefetches, at least on linux/X86.  Presumably this loop     */
843         /* ends up with less register pressure, and gcc thus ends up    */
844         /* generating slightly better code.  Overall gcc code quality   */
845         /* for this loop is still not great.                            */
846         for(;;) {
847           PREFETCH(limit - PREF_DIST*CACHE_LINE_SIZE);
848           GC_ASSERT((word)limit >= (word)current_p);
849           deferred = *(word *)limit;
850           FIXUP_POINTER(deferred);
851           limit -= ALIGNMENT;
852           if (deferred >= (word)least_ha && deferred < (word)greatest_ha) {
853             PREFETCH((ptr_t)deferred);
854             break;
855           }
856           if ((word)current_p > (word)limit) goto next_object;
857           /* Unroll once, so we don't do too many of the prefetches     */
858           /* based on limit.                                            */
859           deferred = *(word *)limit;
860           FIXUP_POINTER(deferred);
861           limit -= ALIGNMENT;
862           if (deferred >= (word)least_ha && deferred < (word)greatest_ha) {
863             PREFETCH((ptr_t)deferred);
864             break;
865           }
866           if ((word)current_p > (word)limit) goto next_object;
867         }
868 #     endif
869 
870       while ((word)current_p <= (word)limit) {
871         /* Empirically, unrolling this loop doesn't help a lot. */
872         /* Since PUSH_CONTENTS expands to a lot of code,        */
873         /* we don't.                                            */
874         current = *(word *)current_p;
875         FIXUP_POINTER(current);
876         PREFETCH(current_p + PREF_DIST*CACHE_LINE_SIZE);
877         if (current >= (word)least_ha && current < (word)greatest_ha) {
878           /* Prefetch the contents of the object we just pushed.  It's  */
879           /* likely we will need them soon.                             */
880           PREFETCH((ptr_t)current);
881 #         ifdef ENABLE_TRACE
882             if (GC_trace_addr == current_p) {
883               GC_log_printf("GC #%u: considering(1) %p -> %p\n",
884                             (unsigned)GC_gc_no, (void *)current_p,
885                             (void *)current);
886             }
887 #         endif /* ENABLE_TRACE */
888           PUSH_CONTENTS((ptr_t)current, mark_stack_top,
889                         mark_stack_limit, current_p);
890         }
891         current_p += ALIGNMENT;
892       }
893 
894 #     ifndef SMALL_CONFIG
895         /* We still need to mark the entry we previously prefetched.    */
896         /* We already know that it passes the preliminary pointer       */
897         /* validity test.                                               */
898 #       ifdef ENABLE_TRACE
899             if (GC_trace_addr == current_p) {
900               GC_log_printf("GC #%u: considering(2) %p -> %p\n",
901                             (unsigned)GC_gc_no, (void *)current_p,
902                             (void *)deferred);
903             }
904 #       endif /* ENABLE_TRACE */
905         PUSH_CONTENTS((ptr_t)deferred, mark_stack_top,
906                       mark_stack_limit, current_p);
907         next_object:;
908 #     endif
909     }
910   }
911   return mark_stack_top;
912 }
913 
914 #ifdef PARALLEL_MARK
915 
916 STATIC GC_bool GC_help_wanted = FALSE;  /* Protected by mark lock       */
917 STATIC unsigned GC_helper_count = 0;    /* Number of running helpers.   */
918                                         /* Protected by mark lock       */
919 STATIC unsigned GC_active_count = 0;    /* Number of active helpers.    */
920                                         /* Protected by mark lock       */
921                                         /* May increase and decrease    */
922                                         /* within each mark cycle.  But */
923                                         /* once it returns to 0, it     */
924                                         /* stays zero for the cycle.    */
925 
926 GC_INNER word GC_mark_no = 0;
927 
928 static mse *main_local_mark_stack;
929 
930 #ifdef LINT2
931 # define LOCAL_MARK_STACK_SIZE (HBLKSIZE / 8)
932 #else
933 # define LOCAL_MARK_STACK_SIZE HBLKSIZE
934         /* Under normal circumstances, this is big enough to guarantee  */
935         /* we don't overflow half of it in a single call to             */
936         /* GC_mark_from.                                                */
937 #endif
938 
939 /* Wait all markers to finish initialization (i.e. store        */
940 /* marker_[b]sp, marker_mach_threads, GC_marker_Id).            */
GC_wait_for_markers_init(void)941 GC_INNER void GC_wait_for_markers_init(void)
942 {
943   signed_word count;
944 
945   if (GC_markers_m1 == 0)
946     return;
947 
948   /* Allocate the local mark stack for the thread that holds GC lock.   */
949 # ifndef CAN_HANDLE_FORK
950     GC_ASSERT(NULL == main_local_mark_stack);
951 # else
952     if (NULL == main_local_mark_stack)
953 # endif
954   {
955     size_t bytes_to_get =
956                 ROUNDUP_PAGESIZE_IF_MMAP(LOCAL_MARK_STACK_SIZE * sizeof(mse));
957     main_local_mark_stack = (mse *)GET_MEM(bytes_to_get);
958     if (NULL == main_local_mark_stack)
959       ABORT("Insufficient memory for main local_mark_stack");
960     GC_add_to_our_memory((ptr_t)main_local_mark_stack, bytes_to_get);
961   }
962 
963   /* Reuse marker lock and builders count to synchronize        */
964   /* marker threads startup.                                    */
965   GC_acquire_mark_lock();
966   GC_fl_builder_count += GC_markers_m1;
967   count = GC_fl_builder_count;
968   GC_release_mark_lock();
969   if (count != 0) {
970     GC_ASSERT(count > 0);
971     GC_wait_for_reclaim();
972   }
973 }
974 
975 /* Steal mark stack entries starting at mse low into mark stack local   */
976 /* until we either steal mse high, or we have max entries.              */
977 /* Return a pointer to the top of the local mark stack.                 */
978 /* *next is replaced by a pointer to the next unscanned mark stack      */
979 /* entry.                                                               */
GC_steal_mark_stack(mse * low,mse * high,mse * local,unsigned max,mse ** next)980 STATIC mse * GC_steal_mark_stack(mse * low, mse * high, mse * local,
981                                  unsigned max, mse **next)
982 {
983     mse *p;
984     mse *top = local - 1;
985     unsigned i = 0;
986 
987     GC_ASSERT((word)high >= (word)(low - 1)
988               && (word)(high - low + 1) <= GC_mark_stack_size);
989     for (p = low; (word)p <= (word)high && i <= max; ++p) {
990         word descr = (word)AO_load(&p->mse_descr.ao);
991         if (descr != 0) {
992             /* Must be ordered after read of descr: */
993             AO_store_release_write(&p->mse_descr.ao, 0);
994             /* More than one thread may get this entry, but that's only */
995             /* a minor performance problem.                             */
996             ++top;
997             top -> mse_descr.w = descr;
998             top -> mse_start = p -> mse_start;
999             GC_ASSERT((descr & GC_DS_TAGS) != GC_DS_LENGTH
1000                       || descr < (word)GC_greatest_plausible_heap_addr
1001                                         - (word)GC_least_plausible_heap_addr
1002                       || (word)(p->mse_start + descr)
1003                             <= (word)GC_least_plausible_heap_addr
1004                       || (word)p->mse_start
1005                             >= (word)GC_greatest_plausible_heap_addr);
1006             /* If this is a big object, count it as                     */
1007             /* size/256 + 1 objects.                                    */
1008             ++i;
1009             if ((descr & GC_DS_TAGS) == GC_DS_LENGTH) i += (int)(descr >> 8);
1010         }
1011     }
1012     *next = p;
1013     return top;
1014 }
1015 
1016 /* Copy back a local mark stack.        */
1017 /* low and high are inclusive bounds.   */
GC_return_mark_stack(mse * low,mse * high)1018 STATIC void GC_return_mark_stack(mse * low, mse * high)
1019 {
1020     mse * my_top;
1021     mse * my_start;
1022     size_t stack_size;
1023 
1024     if ((word)high < (word)low) return;
1025     stack_size = high - low + 1;
1026     GC_acquire_mark_lock();
1027     my_top = GC_mark_stack_top; /* Concurrent modification impossible. */
1028     my_start = my_top + 1;
1029     if ((word)(my_start - GC_mark_stack + stack_size)
1030                 > (word)GC_mark_stack_size) {
1031       GC_COND_LOG_PRINTF("No room to copy back mark stack\n");
1032       GC_mark_state = MS_INVALID;
1033       GC_mark_stack_too_small = TRUE;
1034       /* We drop the local mark stack.  We'll fix things later. */
1035     } else {
1036       BCOPY(low, my_start, stack_size * sizeof(mse));
1037       GC_ASSERT((mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top))
1038                 == my_top);
1039       AO_store_release_write((volatile AO_t *)(&GC_mark_stack_top),
1040                              (AO_t)(my_top + stack_size));
1041                 /* Ensures visibility of previously written stack contents. */
1042     }
1043     GC_release_mark_lock();
1044     GC_notify_all_marker();
1045 }
1046 
1047 #ifndef N_LOCAL_ITERS
1048 # define N_LOCAL_ITERS 1
1049 #endif
1050 
1051 /* This function is only called when the local  */
1052 /* and the main mark stacks are both empty.     */
has_inactive_helpers(void)1053 static GC_bool has_inactive_helpers(void)
1054 {
1055   GC_bool res;
1056 
1057   GC_acquire_mark_lock();
1058   res = GC_active_count < GC_helper_count;
1059   GC_release_mark_lock();
1060   return res;
1061 }
1062 
1063 /* Mark from the local mark stack.              */
1064 /* On return, the local mark stack is empty.    */
1065 /* But this may be achieved by copying the      */
1066 /* local mark stack back into the global one.   */
1067 /* We do not hold the mark lock.                */
GC_do_local_mark(mse * local_mark_stack,mse * local_top)1068 STATIC void GC_do_local_mark(mse *local_mark_stack, mse *local_top)
1069 {
1070     unsigned n;
1071 
1072     for (;;) {
1073         for (n = 0; n < N_LOCAL_ITERS; ++n) {
1074             local_top = GC_mark_from(local_top, local_mark_stack,
1075                                      local_mark_stack + LOCAL_MARK_STACK_SIZE);
1076             if ((word)local_top < (word)local_mark_stack) return;
1077             if ((word)(local_top - local_mark_stack)
1078                         >= LOCAL_MARK_STACK_SIZE / 2) {
1079                 GC_return_mark_stack(local_mark_stack, local_top);
1080                 return;
1081             }
1082         }
1083         if ((word)AO_load((volatile AO_t *)&GC_mark_stack_top)
1084             < (word)AO_load(&GC_first_nonempty)
1085             && (word)local_top > (word)(local_mark_stack + 1)
1086             && has_inactive_helpers()) {
1087             /* Try to share the load, since the main stack is empty,    */
1088             /* and helper threads are waiting for a refill.             */
1089             /* The entries near the bottom of the stack are likely      */
1090             /* to require more work.  Thus we return those, even though */
1091             /* it's harder.                                             */
1092             mse * new_bottom = local_mark_stack
1093                                 + (local_top - local_mark_stack)/2;
1094             GC_ASSERT((word)new_bottom > (word)local_mark_stack
1095                       && (word)new_bottom < (word)local_top);
1096             GC_return_mark_stack(local_mark_stack, new_bottom - 1);
1097             memmove(local_mark_stack, new_bottom,
1098                     (local_top - new_bottom + 1) * sizeof(mse));
1099             local_top -= (new_bottom - local_mark_stack);
1100         }
1101     }
1102 }
1103 
1104 #ifndef ENTRIES_TO_GET
1105 # define ENTRIES_TO_GET 5
1106 #endif
1107 
1108 /* Mark using the local mark stack until the global mark stack is empty */
1109 /* and there are no active workers. Update GC_first_nonempty to reflect */
1110 /* progress.  Caller holds the mark lock.                               */
1111 /* Caller has already incremented GC_helper_count.  We decrement it,    */
1112 /* and maintain GC_active_count.                                        */
GC_mark_local(mse * local_mark_stack,int id)1113 STATIC void GC_mark_local(mse *local_mark_stack, int id)
1114 {
1115     mse * my_first_nonempty;
1116 
1117     GC_active_count++;
1118     my_first_nonempty = (mse *)AO_load(&GC_first_nonempty);
1119     GC_ASSERT((word)GC_mark_stack <= (word)my_first_nonempty);
1120     GC_ASSERT((word)my_first_nonempty
1121         <= (word)AO_load((volatile AO_t *)&GC_mark_stack_top) + sizeof(mse));
1122     GC_VERBOSE_LOG_PRINTF("Starting mark helper %d\n", id);
1123     GC_release_mark_lock();
1124     for (;;) {
1125         size_t n_on_stack;
1126         unsigned n_to_get;
1127         mse * my_top;
1128         mse * local_top;
1129         mse * global_first_nonempty = (mse *)AO_load(&GC_first_nonempty);
1130 
1131         GC_ASSERT((word)my_first_nonempty >= (word)GC_mark_stack &&
1132                   (word)my_first_nonempty <=
1133                         (word)AO_load((volatile AO_t *)&GC_mark_stack_top)
1134                         + sizeof(mse));
1135         GC_ASSERT((word)global_first_nonempty >= (word)GC_mark_stack);
1136         if ((word)my_first_nonempty < (word)global_first_nonempty) {
1137             my_first_nonempty = global_first_nonempty;
1138         } else if ((word)global_first_nonempty < (word)my_first_nonempty) {
1139             (void)AO_compare_and_swap(&GC_first_nonempty,
1140                                       (AO_t)global_first_nonempty,
1141                                       (AO_t)my_first_nonempty);
1142             /* If this fails, we just go ahead, without updating        */
1143             /* GC_first_nonempty.                                       */
1144         }
1145         /* Perhaps we should also update GC_first_nonempty, if it */
1146         /* is less.  But that would require using atomic updates. */
1147         my_top = (mse *)AO_load_acquire((volatile AO_t *)(&GC_mark_stack_top));
1148         if ((word)my_top < (word)my_first_nonempty) {
1149             GC_acquire_mark_lock();
1150             my_top = GC_mark_stack_top;
1151                 /* Asynchronous modification impossible here,   */
1152                 /* since we hold mark lock.                     */
1153             n_on_stack = my_top - my_first_nonempty + 1;
1154             if (0 == n_on_stack) {
1155                 GC_active_count--;
1156                 GC_ASSERT(GC_active_count <= GC_helper_count);
1157                 /* Other markers may redeposit objects  */
1158                 /* on the stack.                                */
1159                 if (0 == GC_active_count) GC_notify_all_marker();
1160                 while (GC_active_count > 0
1161                        && (word)AO_load(&GC_first_nonempty)
1162                                 > (word)GC_mark_stack_top) {
1163                     /* We will be notified if either GC_active_count    */
1164                     /* reaches zero, or if more objects are pushed on   */
1165                     /* the global mark stack.                           */
1166                     GC_wait_marker();
1167                 }
1168                 if (GC_active_count == 0
1169                     && (word)AO_load(&GC_first_nonempty)
1170                         > (word)GC_mark_stack_top) {
1171                     GC_bool need_to_notify = FALSE;
1172                     /* The above conditions can't be falsified while we */
1173                     /* hold the mark lock, since neither                */
1174                     /* GC_active_count nor GC_mark_stack_top can        */
1175                     /* change.  GC_first_nonempty can only be           */
1176                     /* incremented asynchronously.  Thus we know that   */
1177                     /* both conditions actually held simultaneously.    */
1178                     GC_helper_count--;
1179                     if (0 == GC_helper_count) need_to_notify = TRUE;
1180                     GC_VERBOSE_LOG_PRINTF("Finished mark helper %d\n", id);
1181                     if (need_to_notify) GC_notify_all_marker();
1182                     return;
1183                 }
1184                 /* else there's something on the stack again, or        */
1185                 /* another helper may push something.                   */
1186                 GC_active_count++;
1187                 GC_ASSERT(GC_active_count > 0);
1188                 GC_release_mark_lock();
1189                 continue;
1190             } else {
1191                 GC_release_mark_lock();
1192             }
1193         } else {
1194             n_on_stack = my_top - my_first_nonempty + 1;
1195         }
1196         n_to_get = ENTRIES_TO_GET;
1197         if (n_on_stack < 2 * ENTRIES_TO_GET) n_to_get = 1;
1198         local_top = GC_steal_mark_stack(my_first_nonempty, my_top,
1199                                         local_mark_stack, n_to_get,
1200                                         &my_first_nonempty);
1201         GC_ASSERT((word)my_first_nonempty >= (word)GC_mark_stack &&
1202                   (word)my_first_nonempty <=
1203                         (word)AO_load((volatile AO_t *)&GC_mark_stack_top)
1204                         + sizeof(mse));
1205         GC_do_local_mark(local_mark_stack, local_top);
1206     }
1207 }
1208 
1209 /* Perform Parallel mark.                       */
1210 /* We hold the GC lock, not the mark lock.      */
1211 /* Currently runs until the mark stack is       */
1212 /* empty.                                       */
GC_do_parallel_mark(void)1213 STATIC void GC_do_parallel_mark(void)
1214 {
1215     GC_acquire_mark_lock();
1216     GC_ASSERT(I_HOLD_LOCK());
1217     /* This could be a GC_ASSERT, but it seems safer to keep it on      */
1218     /* all the time, especially since it's cheap.                       */
1219     if (GC_help_wanted || GC_active_count != 0 || GC_helper_count != 0)
1220         ABORT("Tried to start parallel mark in bad state");
1221     GC_VERBOSE_LOG_PRINTF("Starting marking for mark phase number %lu\n",
1222                           (unsigned long)GC_mark_no);
1223     GC_first_nonempty = (AO_t)GC_mark_stack;
1224     GC_active_count = 0;
1225     GC_helper_count = 1;
1226     GC_help_wanted = TRUE;
1227     GC_notify_all_marker();
1228         /* Wake up potential helpers.   */
1229     GC_mark_local(main_local_mark_stack, 0);
1230     GC_help_wanted = FALSE;
1231     /* Done; clean up.  */
1232     while (GC_helper_count > 0) {
1233       GC_wait_marker();
1234     }
1235     /* GC_helper_count cannot be incremented while GC_help_wanted == FALSE */
1236     GC_VERBOSE_LOG_PRINTF("Finished marking for mark phase number %lu\n",
1237                           (unsigned long)GC_mark_no);
1238     GC_mark_no++;
1239     GC_release_mark_lock();
1240     GC_notify_all_marker();
1241 }
1242 
1243 
1244 /* Try to help out the marker, if it's running.         */
1245 /* We do not hold the GC lock, but the requestor does.  */
1246 /* And we hold the mark lock.                           */
GC_help_marker(word my_mark_no)1247 GC_INNER void GC_help_marker(word my_mark_no)
1248 {
1249 #   define my_id my_id_mse.mse_descr.w
1250     mse my_id_mse;  /* align local_mark_stack explicitly */
1251     mse local_mark_stack[LOCAL_MARK_STACK_SIZE];
1252                 /* Note: local_mark_stack is quite big (up to 128 KiB). */
1253 
1254     GC_ASSERT(GC_parallel);
1255     while (GC_mark_no < my_mark_no
1256            || (!GC_help_wanted && GC_mark_no == my_mark_no)) {
1257       GC_wait_marker();
1258     }
1259     my_id = GC_helper_count;
1260     if (GC_mark_no != my_mark_no || my_id > (unsigned)GC_markers_m1) {
1261       /* Second test is useful only if original threads can also        */
1262       /* act as helpers.  Under Linux they can't.                       */
1263       return;
1264     }
1265     GC_helper_count = (unsigned)my_id + 1;
1266     GC_mark_local(local_mark_stack, (int)my_id);
1267     /* GC_mark_local decrements GC_helper_count. */
1268 #   undef my_id
1269 }
1270 
1271 #endif /* PARALLEL_MARK */
1272 
GC_scratch_recycle_inner(void * ptr,size_t bytes)1273 GC_INNER void GC_scratch_recycle_inner(void *ptr, size_t bytes)
1274 {
1275   if (ptr != NULL) {
1276     size_t page_offset = (word)ptr & (GC_page_size - 1);
1277     size_t displ = 0;
1278     size_t recycled_bytes;
1279 
1280     GC_ASSERT(bytes != 0);
1281     GC_ASSERT(GC_page_size != 0);
1282     /* TODO: Assert correct memory flags if GWW_VDB */
1283     if (page_offset != 0)
1284       displ = GC_page_size - page_offset;
1285     recycled_bytes = bytes > displ ? (bytes - displ) & ~(GC_page_size-1) : 0;
1286     GC_COND_LOG_PRINTF("Recycle %lu/%lu scratch-allocated bytes at %p\n",
1287                        (unsigned long)recycled_bytes, (unsigned long)bytes,
1288                        ptr);
1289     if (recycled_bytes > 0)
1290       GC_add_to_heap((struct hblk *)((word)ptr + displ), recycled_bytes);
1291   }
1292 }
1293 
1294 /* Allocate or reallocate space for mark stack of size n entries.  */
1295 /* May silently fail.                                              */
alloc_mark_stack(size_t n)1296 static void alloc_mark_stack(size_t n)
1297 {
1298     mse * new_stack = (mse *)GC_scratch_alloc(n * sizeof(struct GC_ms_entry));
1299 #   ifdef GWW_VDB
1300       /* Don't recycle a stack segment obtained with the wrong flags.   */
1301       /* Win32 GetWriteWatch requires the right kind of memory.         */
1302       static GC_bool GC_incremental_at_stack_alloc = FALSE;
1303       GC_bool recycle_old = !GC_auto_incremental
1304                             || GC_incremental_at_stack_alloc;
1305 
1306       GC_incremental_at_stack_alloc = GC_auto_incremental;
1307 #   else
1308 #     define recycle_old TRUE
1309 #   endif
1310 
1311     GC_mark_stack_too_small = FALSE;
1312     if (GC_mark_stack != NULL) {
1313         if (new_stack != 0) {
1314           if (recycle_old) {
1315             /* Recycle old space */
1316             GC_scratch_recycle_inner(GC_mark_stack,
1317                         GC_mark_stack_size * sizeof(struct GC_ms_entry));
1318           }
1319           GC_mark_stack = new_stack;
1320           GC_mark_stack_size = n;
1321           /* FIXME: Do we need some way to reset GC_mark_stack_size?    */
1322           GC_mark_stack_limit = new_stack + n;
1323           GC_COND_LOG_PRINTF("Grew mark stack to %lu frames\n",
1324                              (unsigned long)GC_mark_stack_size);
1325         } else {
1326           WARN("Failed to grow mark stack to %" WARN_PRIdPTR " frames\n", n);
1327         }
1328     } else if (NULL == new_stack) {
1329         GC_err_printf("No space for mark stack\n");
1330         EXIT();
1331     } else {
1332         GC_mark_stack = new_stack;
1333         GC_mark_stack_size = n;
1334         GC_mark_stack_limit = new_stack + n;
1335     }
1336     GC_mark_stack_top = GC_mark_stack-1;
1337 }
1338 
GC_mark_init(void)1339 GC_INNER void GC_mark_init(void)
1340 {
1341     alloc_mark_stack(INITIAL_MARK_STACK_SIZE);
1342 }
1343 
1344 /*
1345  * Push all locations between b and t onto the mark stack.
1346  * b is the first location to be checked. t is one past the last
1347  * location to be checked.
1348  * Should only be used if there is no possibility of mark stack
1349  * overflow.
1350  */
GC_push_all(void * bottom,void * top)1351 GC_API void GC_CALL GC_push_all(void *bottom, void *top)
1352 {
1353     word length;
1354 
1355     bottom = (void *)(((word)bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1356     top = (void *)((word)top & ~(ALIGNMENT-1));
1357     if ((word)bottom >= (word)top) return;
1358 
1359     GC_mark_stack_top++;
1360     if ((word)GC_mark_stack_top >= (word)GC_mark_stack_limit) {
1361         ABORT("Unexpected mark stack overflow");
1362     }
1363     length = (word)top - (word)bottom;
1364 #   if GC_DS_TAGS > ALIGNMENT - 1
1365         length += GC_DS_TAGS;
1366         length &= ~GC_DS_TAGS;
1367 #   endif
1368     GC_mark_stack_top -> mse_start = (ptr_t)bottom;
1369     GC_mark_stack_top -> mse_descr.w = length;
1370 }
1371 
1372 #ifndef GC_DISABLE_INCREMENTAL
1373 
1374   /* Analogous to the above, but push only those pages h with           */
1375   /* dirty_fn(h) != 0.  We use GC_push_all to actually push the block.  */
1376   /* Used both to selectively push dirty pages, or to push a block in   */
1377   /* piecemeal fashion, to allow for more marking concurrency.          */
1378   /* Will not overflow mark stack if GC_push_all pushes a small fixed   */
1379   /* number of entries.  (This is invoked only if GC_push_all pushes    */
1380   /* a single entry, or if it marks each object before pushing it, thus */
1381   /* ensuring progress in the event of a stack overflow.)               */
GC_push_selected(ptr_t bottom,ptr_t top,GC_bool (* dirty_fn)(struct hblk *))1382   STATIC void GC_push_selected(ptr_t bottom, ptr_t top,
1383                                GC_bool (*dirty_fn)(struct hblk *))
1384   {
1385     struct hblk * h;
1386 
1387     bottom = (ptr_t)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1388     top = (ptr_t)(((word) top) & ~(ALIGNMENT-1));
1389     if ((word)bottom >= (word)top) return;
1390 
1391     h = HBLKPTR(bottom + HBLKSIZE);
1392     if ((word)top <= (word)h) {
1393         if ((*dirty_fn)(h-1)) {
1394             GC_push_all(bottom, top);
1395         }
1396         return;
1397     }
1398     if ((*dirty_fn)(h-1)) {
1399         if ((word)(GC_mark_stack_top - GC_mark_stack)
1400             > 3 * GC_mark_stack_size / 4) {
1401             GC_push_all(bottom, top);
1402             return;
1403         }
1404         GC_push_all(bottom, h);
1405     }
1406 
1407     while ((word)(h+1) <= (word)top) {
1408         if ((*dirty_fn)(h)) {
1409             if ((word)(GC_mark_stack_top - GC_mark_stack)
1410                 > 3 * GC_mark_stack_size / 4) {
1411                 /* Danger of mark stack overflow */
1412                 GC_push_all(h, top);
1413                 return;
1414             } else {
1415                 GC_push_all(h, h + 1);
1416             }
1417         }
1418         h++;
1419     }
1420 
1421     if ((ptr_t)h != top && (*dirty_fn)(h)) {
1422        GC_push_all(h, top);
1423     }
1424   }
1425 
GC_push_conditional(void * bottom,void * top,int all)1426   GC_API void GC_CALL GC_push_conditional(void *bottom, void *top, int all)
1427   {
1428     if (!all) {
1429       GC_push_selected((ptr_t)bottom, (ptr_t)top, GC_page_was_dirty);
1430     } else {
1431 #     ifdef PROC_VDB
1432         if (GC_auto_incremental) {
1433           /* Pages that were never dirtied cannot contain pointers.     */
1434           GC_push_selected((ptr_t)bottom, (ptr_t)top, GC_page_was_ever_dirty);
1435         } else
1436 #     endif
1437       /* else */ {
1438         GC_push_all(bottom, top);
1439       }
1440     }
1441   }
1442 #else
GC_push_conditional(void * bottom,void * top,int all GC_ATTR_UNUSED)1443   GC_API void GC_CALL GC_push_conditional(void *bottom, void *top,
1444                                           int all GC_ATTR_UNUSED)
1445   {
1446     GC_push_all(bottom, top);
1447   }
1448 #endif /* GC_DISABLE_INCREMENTAL */
1449 
1450 #if defined(MSWIN32) || defined(MSWINCE)
GC_push_one(word p)1451   void __cdecl GC_push_one(word p)
1452 #else
1453   void GC_push_one(word p)
1454 #endif
1455 {
1456     GC_PUSH_ONE_STACK(p, MARKED_FROM_REGISTER);
1457 }
1458 
GC_mark_and_push(void * obj,mse * mark_stack_ptr,mse * mark_stack_limit,void ** src GC_ATTR_UNUSED)1459 GC_API struct GC_ms_entry * GC_CALL GC_mark_and_push(void *obj,
1460                                                 mse *mark_stack_ptr,
1461                                                 mse *mark_stack_limit,
1462                                                 void ** src GC_ATTR_UNUSED)
1463 {
1464     hdr * hhdr;
1465 
1466     PREFETCH(obj);
1467     GET_HDR(obj, hhdr);
1468     if ((EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr), FALSE)
1469          && (!GC_all_interior_pointers
1470              || NULL == (hhdr = GC_find_header((ptr_t)GC_base(obj)))))
1471         || EXPECT(HBLK_IS_FREE(hhdr), FALSE)) {
1472       GC_ADD_TO_BLACK_LIST_NORMAL(obj, (ptr_t)src);
1473       return mark_stack_ptr;
1474     }
1475     return GC_push_contents_hdr((ptr_t)obj, mark_stack_ptr, mark_stack_limit,
1476                                 (ptr_t)src, hhdr, TRUE);
1477 }
1478 
1479 /* Mark and push (i.e. gray) a single object p onto the main    */
1480 /* mark stack.  Consider p to be valid if it is an interior     */
1481 /* pointer.                                                     */
1482 /* The object p has passed a preliminary pointer validity       */
1483 /* test, but we do not definitely know whether it is valid.     */
1484 /* Mark bits are NOT atomically updated.  Thus this must be the */
1485 /* only thread setting them.                                    */
1486 # if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
GC_mark_and_push_stack(ptr_t p,ptr_t source)1487     GC_INNER void GC_mark_and_push_stack(ptr_t p, ptr_t source)
1488 # else
1489     GC_INNER void GC_mark_and_push_stack(ptr_t p)
1490 #   define source ((ptr_t)0)
1491 # endif
1492 {
1493     hdr * hhdr;
1494     ptr_t r = p;
1495 
1496     PREFETCH(p);
1497     GET_HDR(p, hhdr);
1498     if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr), FALSE)
1499         && (NULL == hhdr
1500             || (r = (ptr_t)GC_base(p)) == NULL
1501             || (hhdr = HDR(r)) == NULL)) {
1502         GC_ADD_TO_BLACK_LIST_STACK(p, source);
1503         return;
1504     }
1505     if (EXPECT(HBLK_IS_FREE(hhdr), FALSE)) {
1506         GC_ADD_TO_BLACK_LIST_NORMAL(p, source);
1507         return;
1508     }
1509 #   ifdef THREADS
1510       /* Pointer is on the stack.  We may have dirtied the object       */
1511       /* it points to, but have not called GC_dirty yet.                */
1512       GC_dirty(p); /* entire object */
1513 #   endif
1514     GC_mark_stack_top = GC_push_contents_hdr(r, GC_mark_stack_top,
1515                                              GC_mark_stack_limit,
1516                                              source, hhdr, FALSE);
1517     /* We silently ignore pointers to near the end of a block,  */
1518     /* which is very mildly suboptimal.                         */
1519     /* FIXME: We should probably add a header word to address   */
1520     /* this.                                                    */
1521 }
1522 # undef source
1523 
1524 #ifdef TRACE_BUF
1525 
1526 # ifndef TRACE_ENTRIES
1527 #   define TRACE_ENTRIES 1000
1528 # endif
1529 
1530 struct trace_entry {
1531     char * kind;
1532     word gc_no;
1533     word bytes_allocd;
1534     word arg1;
1535     word arg2;
1536 } GC_trace_buf[TRACE_ENTRIES] = { { NULL, 0, 0, 0, 0 } };
1537 
1538 int GC_trace_buf_ptr = 0;
1539 
GC_add_trace_entry(char * kind,word arg1,word arg2)1540 void GC_add_trace_entry(char *kind, word arg1, word arg2)
1541 {
1542     GC_trace_buf[GC_trace_buf_ptr].kind = kind;
1543     GC_trace_buf[GC_trace_buf_ptr].gc_no = GC_gc_no;
1544     GC_trace_buf[GC_trace_buf_ptr].bytes_allocd = GC_bytes_allocd;
1545     GC_trace_buf[GC_trace_buf_ptr].arg1 = arg1 ^ 0x80000000;
1546     GC_trace_buf[GC_trace_buf_ptr].arg2 = arg2 ^ 0x80000000;
1547     GC_trace_buf_ptr++;
1548     if (GC_trace_buf_ptr >= TRACE_ENTRIES) GC_trace_buf_ptr = 0;
1549 }
1550 
GC_print_trace_inner(word gc_no)1551 GC_API void GC_CALL GC_print_trace_inner(word gc_no)
1552 {
1553     int i;
1554 
1555     for (i = GC_trace_buf_ptr-1; i != GC_trace_buf_ptr; i--) {
1556         struct trace_entry *p;
1557 
1558         if (i < 0) i = TRACE_ENTRIES-1;
1559         p = GC_trace_buf + i;
1560         if (p -> gc_no < gc_no || p -> kind == 0) {
1561             return;
1562         }
1563         GC_printf("Trace:%s (gc:%u, bytes:%lu) 0x%lX, 0x%lX\n",
1564                   p -> kind, (unsigned)p -> gc_no,
1565                   (unsigned long)p -> bytes_allocd,
1566                   (long)p->arg1 ^ 0x80000000L, (long)p->arg2 ^ 0x80000000L);
1567     }
1568     GC_printf("Trace incomplete\n");
1569 }
1570 
GC_print_trace(word gc_no)1571 GC_API void GC_CALL GC_print_trace(word gc_no)
1572 {
1573     DCL_LOCK_STATE;
1574 
1575     LOCK();
1576     GC_print_trace_inner(gc_no);
1577     UNLOCK();
1578 }
1579 
1580 #endif /* TRACE_BUF */
1581 
1582 /*
1583  * A version of GC_push_all that treats all interior pointers as valid
1584  * and scans the entire region immediately, in case the contents
1585  * change.
1586  */
1587 GC_ATTR_NO_SANITIZE_ADDR GC_ATTR_NO_SANITIZE_MEMORY GC_ATTR_NO_SANITIZE_THREAD
GC_push_all_eager(void * bottom,void * top)1588 GC_API void GC_CALL GC_push_all_eager(void *bottom, void *top)
1589 {
1590     word * b = (word *)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1591     word * t = (word *)(((word) top) & ~(ALIGNMENT-1));
1592     REGISTER word *p;
1593     REGISTER word *lim;
1594     REGISTER ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
1595     REGISTER ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
1596 #   define GC_greatest_plausible_heap_addr greatest_ha
1597 #   define GC_least_plausible_heap_addr least_ha
1598 
1599     if (top == 0) return;
1600     /* check all pointers in range and push if they appear      */
1601     /* to be valid.                                             */
1602       lim = t - 1 /* longword */;
1603       for (p = b; (word)p <= (word)lim;
1604            p = (word *)(((ptr_t)p) + ALIGNMENT)) {
1605         REGISTER word q = *p;
1606 
1607         GC_PUSH_ONE_STACK(q, p);
1608       }
1609 #   undef GC_greatest_plausible_heap_addr
1610 #   undef GC_least_plausible_heap_addr
1611 }
1612 
GC_push_all_stack(ptr_t bottom,ptr_t top)1613 GC_INNER void GC_push_all_stack(ptr_t bottom, ptr_t top)
1614 {
1615 #   ifndef NEED_FIXUP_POINTER
1616       if (GC_all_interior_pointers
1617 #         if defined(THREADS) && defined(MPROTECT_VDB)
1618             && !GC_auto_incremental
1619 #         endif
1620           && (word)GC_mark_stack_top
1621              < (word)(GC_mark_stack_limit - INITIAL_MARK_STACK_SIZE/8)) {
1622         GC_push_all(bottom, top);
1623       } else
1624 #   endif
1625     /* else */ {
1626       GC_push_all_eager(bottom, top);
1627     }
1628 }
1629 
1630 #if defined(WRAP_MARK_SOME) && defined(PARALLEL_MARK)
1631   /* Similar to GC_push_conditional but scans the whole region immediately. */
1632   GC_ATTR_NO_SANITIZE_ADDR GC_ATTR_NO_SANITIZE_MEMORY
1633   GC_ATTR_NO_SANITIZE_THREAD
GC_push_conditional_eager(void * bottom,void * top,GC_bool all)1634   GC_INNER void GC_push_conditional_eager(void *bottom, void *top,
1635                                           GC_bool all)
1636   {
1637     word * b = (word *)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1638     word * t = (word *)(((word) top) & ~(ALIGNMENT-1));
1639     REGISTER word *p;
1640     REGISTER word *lim;
1641     REGISTER ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
1642     REGISTER ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
1643 #   define GC_greatest_plausible_heap_addr greatest_ha
1644 #   define GC_least_plausible_heap_addr least_ha
1645 
1646     if (top == NULL)
1647       return;
1648     (void)all; /* TODO: If !all then scan only dirty pages. */
1649 
1650     lim = t - 1;
1651     for (p = b; (word)p <= (word)lim; p = (word *)((ptr_t)p + ALIGNMENT)) {
1652       REGISTER word q = *p;
1653 
1654       GC_PUSH_ONE_HEAP(q, p, GC_mark_stack_top);
1655     }
1656 #   undef GC_greatest_plausible_heap_addr
1657 #   undef GC_least_plausible_heap_addr
1658   }
1659 #endif /* WRAP_MARK_SOME && PARALLEL_MARK */
1660 
1661 #if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES) && \
1662     defined(MARK_BIT_PER_GRANULE)
1663 # if GC_GRANULE_WORDS == 1
1664 #   define USE_PUSH_MARKED_ACCELERATORS
1665 #   define PUSH_GRANULE(q) \
1666                 do { \
1667                   word qcontents = (q)[0]; \
1668                   GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1669                 } while (0)
1670 # elif GC_GRANULE_WORDS == 2
1671 #   define USE_PUSH_MARKED_ACCELERATORS
1672 #   define PUSH_GRANULE(q) \
1673                 do { \
1674                   word qcontents = (q)[0]; \
1675                   GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1676                   qcontents = (q)[1]; \
1677                   GC_PUSH_ONE_HEAP(qcontents, (q)+1, GC_mark_stack_top); \
1678                 } while (0)
1679 # elif GC_GRANULE_WORDS == 4
1680 #   define USE_PUSH_MARKED_ACCELERATORS
1681 #   define PUSH_GRANULE(q) \
1682                 do { \
1683                   word qcontents = (q)[0]; \
1684                   GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1685                   qcontents = (q)[1]; \
1686                   GC_PUSH_ONE_HEAP(qcontents, (q)+1, GC_mark_stack_top); \
1687                   qcontents = (q)[2]; \
1688                   GC_PUSH_ONE_HEAP(qcontents, (q)+2, GC_mark_stack_top); \
1689                   qcontents = (q)[3]; \
1690                   GC_PUSH_ONE_HEAP(qcontents, (q)+3, GC_mark_stack_top); \
1691                 } while (0)
1692 # endif
1693 #endif /* !USE_MARK_BYTES && MARK_BIT_PER_GRANULE */
1694 
1695 #ifdef USE_PUSH_MARKED_ACCELERATORS
1696 /* Push all objects reachable from marked objects in the given block */
1697 /* containing objects of size 1 granule.                             */
GC_push_marked1(struct hblk * h,hdr * hhdr)1698 STATIC void GC_push_marked1(struct hblk *h, hdr *hhdr)
1699 {
1700     word * mark_word_addr = &(hhdr->hb_marks[0]);
1701     word *p;
1702     word *plim;
1703 
1704     /* Allow registers to be used for some frequently accessed  */
1705     /* global variables.  Otherwise aliasing issues are likely  */
1706     /* to prevent that.                                         */
1707     ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
1708     ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
1709     mse * mark_stack_top = GC_mark_stack_top;
1710     mse * mark_stack_limit = GC_mark_stack_limit;
1711 
1712 #   undef GC_mark_stack_top
1713 #   undef GC_mark_stack_limit
1714 #   define GC_mark_stack_top mark_stack_top
1715 #   define GC_mark_stack_limit mark_stack_limit
1716 #   define GC_greatest_plausible_heap_addr greatest_ha
1717 #   define GC_least_plausible_heap_addr least_ha
1718 
1719     p = (word *)(h->hb_body);
1720     plim = (word *)(((word)h) + HBLKSIZE);
1721 
1722     /* go through all words in block */
1723         while ((word)p < (word)plim) {
1724             word mark_word = *mark_word_addr++;
1725             word *q = p;
1726 
1727             while(mark_word != 0) {
1728               if (mark_word & 1) {
1729                   PUSH_GRANULE(q);
1730               }
1731               q += GC_GRANULE_WORDS;
1732               mark_word >>= 1;
1733             }
1734             p += WORDSZ*GC_GRANULE_WORDS;
1735         }
1736 
1737 #   undef GC_greatest_plausible_heap_addr
1738 #   undef GC_least_plausible_heap_addr
1739 #   undef GC_mark_stack_top
1740 #   undef GC_mark_stack_limit
1741 #   define GC_mark_stack_limit GC_arrays._mark_stack_limit
1742 #   define GC_mark_stack_top GC_arrays._mark_stack_top
1743     GC_mark_stack_top = mark_stack_top;
1744 }
1745 
1746 
1747 #ifndef UNALIGNED_PTRS
1748 
1749 /* Push all objects reachable from marked objects in the given block */
1750 /* of size 2 (granules) objects.                                     */
GC_push_marked2(struct hblk * h,hdr * hhdr)1751 STATIC void GC_push_marked2(struct hblk *h, hdr *hhdr)
1752 {
1753     word * mark_word_addr = &(hhdr->hb_marks[0]);
1754     word *p;
1755     word *plim;
1756 
1757     ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
1758     ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
1759     mse * mark_stack_top = GC_mark_stack_top;
1760     mse * mark_stack_limit = GC_mark_stack_limit;
1761 
1762 #   undef GC_mark_stack_top
1763 #   undef GC_mark_stack_limit
1764 #   define GC_mark_stack_top mark_stack_top
1765 #   define GC_mark_stack_limit mark_stack_limit
1766 #   define GC_greatest_plausible_heap_addr greatest_ha
1767 #   define GC_least_plausible_heap_addr least_ha
1768 
1769     p = (word *)(h->hb_body);
1770     plim = (word *)(((word)h) + HBLKSIZE);
1771 
1772     /* go through all words in block */
1773         while ((word)p < (word)plim) {
1774             word mark_word = *mark_word_addr++;
1775             word *q = p;
1776 
1777             while(mark_word != 0) {
1778               if (mark_word & 1) {
1779                   PUSH_GRANULE(q);
1780                   PUSH_GRANULE(q + GC_GRANULE_WORDS);
1781               }
1782               q += 2 * GC_GRANULE_WORDS;
1783               mark_word >>= 2;
1784             }
1785             p += WORDSZ*GC_GRANULE_WORDS;
1786         }
1787 
1788 #   undef GC_greatest_plausible_heap_addr
1789 #   undef GC_least_plausible_heap_addr
1790 #   undef GC_mark_stack_top
1791 #   undef GC_mark_stack_limit
1792 #   define GC_mark_stack_limit GC_arrays._mark_stack_limit
1793 #   define GC_mark_stack_top GC_arrays._mark_stack_top
1794     GC_mark_stack_top = mark_stack_top;
1795 }
1796 
1797 # if GC_GRANULE_WORDS < 4
1798 /* Push all objects reachable from marked objects in the given block */
1799 /* of size 4 (granules) objects.                                     */
1800 /* There is a risk of mark stack overflow here.  But we handle that. */
1801 /* And only unmarked objects get pushed, so it's not very likely.    */
GC_push_marked4(struct hblk * h,hdr * hhdr)1802 STATIC void GC_push_marked4(struct hblk *h, hdr *hhdr)
1803 {
1804     word * mark_word_addr = &(hhdr->hb_marks[0]);
1805     word *p;
1806     word *plim;
1807 
1808     ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
1809     ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
1810     mse * mark_stack_top = GC_mark_stack_top;
1811     mse * mark_stack_limit = GC_mark_stack_limit;
1812 
1813 #   undef GC_mark_stack_top
1814 #   undef GC_mark_stack_limit
1815 #   define GC_mark_stack_top mark_stack_top
1816 #   define GC_mark_stack_limit mark_stack_limit
1817 #   define GC_greatest_plausible_heap_addr greatest_ha
1818 #   define GC_least_plausible_heap_addr least_ha
1819 
1820     p = (word *)(h->hb_body);
1821     plim = (word *)(((word)h) + HBLKSIZE);
1822 
1823     /* go through all words in block */
1824         while ((word)p < (word)plim) {
1825             word mark_word = *mark_word_addr++;
1826             word *q = p;
1827 
1828             while(mark_word != 0) {
1829               if (mark_word & 1) {
1830                   PUSH_GRANULE(q);
1831                   PUSH_GRANULE(q + GC_GRANULE_WORDS);
1832                   PUSH_GRANULE(q + 2*GC_GRANULE_WORDS);
1833                   PUSH_GRANULE(q + 3*GC_GRANULE_WORDS);
1834               }
1835               q += 4 * GC_GRANULE_WORDS;
1836               mark_word >>= 4;
1837             }
1838             p += WORDSZ*GC_GRANULE_WORDS;
1839         }
1840 #   undef GC_greatest_plausible_heap_addr
1841 #   undef GC_least_plausible_heap_addr
1842 #   undef GC_mark_stack_top
1843 #   undef GC_mark_stack_limit
1844 #   define GC_mark_stack_limit GC_arrays._mark_stack_limit
1845 #   define GC_mark_stack_top GC_arrays._mark_stack_top
1846     GC_mark_stack_top = mark_stack_top;
1847 }
1848 
1849 #endif /* GC_GRANULE_WORDS < 4 */
1850 
1851 #endif /* UNALIGNED_PTRS */
1852 
1853 #endif /* USE_PUSH_MARKED_ACCELERATORS */
1854 
1855 /* Push all objects reachable from marked objects in the given block */
GC_push_marked(struct hblk * h,hdr * hhdr)1856 STATIC void GC_push_marked(struct hblk *h, hdr *hhdr)
1857 {
1858     word sz = hhdr -> hb_sz;
1859     word descr = hhdr -> hb_descr;
1860     ptr_t p;
1861     word bit_no;
1862     ptr_t lim;
1863     mse * GC_mark_stack_top_reg;
1864     mse * mark_stack_limit = GC_mark_stack_limit;
1865 
1866     /* Some quick shortcuts: */
1867         if ((/* 0 | */ GC_DS_LENGTH) == descr) return;
1868         if (GC_block_empty(hhdr)/* nothing marked */) return;
1869 #   if !defined(GC_DISABLE_INCREMENTAL)
1870       GC_n_rescuing_pages++;
1871 #   endif
1872     GC_objects_are_marked = TRUE;
1873     if (sz > MAXOBJBYTES) {
1874         lim = h -> hb_body;
1875     } else {
1876         lim = (ptr_t)((word)(h + 1)->hb_body - sz);
1877     }
1878 
1879     switch(BYTES_TO_GRANULES(sz)) {
1880 #   if defined(USE_PUSH_MARKED_ACCELERATORS)
1881      case 1:
1882        GC_push_marked1(h, hhdr);
1883        break;
1884 #    if !defined(UNALIGNED_PTRS)
1885        case 2:
1886          GC_push_marked2(h, hhdr);
1887          break;
1888 #     if GC_GRANULE_WORDS < 4
1889        case 4:
1890          GC_push_marked4(h, hhdr);
1891          break;
1892 #     endif
1893 #    endif
1894 #   else
1895      case 1: /* to suppress "switch statement contains no case" warning */
1896 #   endif
1897      default:
1898       GC_mark_stack_top_reg = GC_mark_stack_top;
1899       for (p = h -> hb_body, bit_no = 0; (word)p <= (word)lim;
1900            p += sz, bit_no += MARK_BIT_OFFSET(sz)) {
1901         if (mark_bit_from_hdr(hhdr, bit_no)) {
1902           /* Mark from fields inside the object. */
1903           GC_mark_stack_top_reg = GC_push_obj(p, hhdr, GC_mark_stack_top_reg,
1904                                               mark_stack_limit);
1905         }
1906       }
1907       GC_mark_stack_top = GC_mark_stack_top_reg;
1908     }
1909 }
1910 
1911 #ifdef ENABLE_DISCLAIM
1912 /* Unconditionally mark from all objects which have not been reclaimed. */
1913 /* This is useful in order to retain pointers which are reachable from  */
1914 /* the disclaim notifiers.                                              */
1915 /*                                                                      */
1916 /* To determine whether an object has been reclaimed, we require that   */
1917 /* any live object has a non-zero as one of the two lowest bits of the  */
1918 /* first word.  On the other hand, a reclaimed object is a members of   */
1919 /* free-lists, and thus contains a word-aligned next-pointer as the     */
1920 /* first word.                                                          */
GC_push_unconditionally(struct hblk * h,hdr * hhdr)1921  STATIC void GC_push_unconditionally(struct hblk *h, hdr *hhdr)
1922  {
1923     word sz = hhdr -> hb_sz;
1924     word descr = hhdr -> hb_descr;
1925     ptr_t p;
1926     ptr_t lim;
1927     mse * GC_mark_stack_top_reg;
1928     mse * mark_stack_limit = GC_mark_stack_limit;
1929 
1930     if ((/* 0 | */ GC_DS_LENGTH) == descr)
1931         return;
1932 
1933 #   if !defined(GC_DISABLE_INCREMENTAL)
1934       GC_n_rescuing_pages++;
1935 #   endif
1936     GC_objects_are_marked = TRUE;
1937     if (sz > MAXOBJBYTES)
1938         lim = h -> hb_body;
1939     else
1940         lim = (ptr_t)((word)(h + 1)->hb_body - sz);
1941 
1942     GC_mark_stack_top_reg = GC_mark_stack_top;
1943     for (p = h -> hb_body; (word)p <= (word)lim; p += sz)
1944       if ((*(word *)p & 0x3) != 0)
1945         GC_mark_stack_top_reg = GC_push_obj(p, hhdr, GC_mark_stack_top_reg,
1946                                             mark_stack_limit);
1947     GC_mark_stack_top = GC_mark_stack_top_reg;
1948   }
1949 #endif /* ENABLE_DISCLAIM */
1950 
1951 #ifndef GC_DISABLE_INCREMENTAL
1952   /* Test whether any page in the given block is dirty.   */
GC_block_was_dirty(struct hblk * h,hdr * hhdr)1953   STATIC GC_bool GC_block_was_dirty(struct hblk *h, hdr *hhdr)
1954   {
1955     word sz = hhdr -> hb_sz;
1956 
1957     if (sz <= MAXOBJBYTES) {
1958          return(GC_page_was_dirty(h));
1959     } else {
1960          ptr_t p = (ptr_t)h;
1961          while ((word)p < (word)h + sz) {
1962              if (GC_page_was_dirty((struct hblk *)p)) return(TRUE);
1963              p += HBLKSIZE;
1964          }
1965          return(FALSE);
1966     }
1967   }
1968 #endif /* GC_DISABLE_INCREMENTAL */
1969 
1970 /* Similar to GC_push_marked, but skip over unallocated blocks  */
1971 /* and return address of next plausible block.                  */
GC_push_next_marked(struct hblk * h)1972 STATIC struct hblk * GC_push_next_marked(struct hblk *h)
1973 {
1974     hdr * hhdr = HDR(h);
1975 
1976     if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr) || HBLK_IS_FREE(hhdr), FALSE)) {
1977       h = GC_next_block(h, FALSE);
1978       if (NULL == h) return NULL;
1979       hhdr = GC_find_header((ptr_t)h);
1980     } else {
1981 #     ifdef LINT2
1982         if (NULL == h) ABORT("Bad HDR() definition");
1983 #     endif
1984     }
1985     GC_push_marked(h, hhdr);
1986     return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
1987 }
1988 
1989 #ifndef GC_DISABLE_INCREMENTAL
1990   /* Identical to above, but mark only from dirty pages   */
GC_push_next_marked_dirty(struct hblk * h)1991   STATIC struct hblk * GC_push_next_marked_dirty(struct hblk *h)
1992   {
1993     hdr * hhdr = HDR(h);
1994 
1995     if (!GC_incremental) ABORT("Dirty bits not set up");
1996     for (;;) {
1997         if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr)
1998                    || HBLK_IS_FREE(hhdr), FALSE)) {
1999           h = GC_next_block(h, FALSE);
2000           if (NULL == h) return NULL;
2001           hhdr = GC_find_header((ptr_t)h);
2002         } else {
2003 #         ifdef LINT2
2004             if (NULL == h) ABORT("Bad HDR() definition");
2005 #         endif
2006         }
2007         if (GC_block_was_dirty(h, hhdr))
2008           break;
2009         h += OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz);
2010         hhdr = HDR(h);
2011     }
2012 #   ifdef ENABLE_DISCLAIM
2013       if ((hhdr -> hb_flags & MARK_UNCONDITIONALLY) != 0) {
2014         GC_push_unconditionally(h, hhdr);
2015 
2016         /* Then we may ask, why not also add the MARK_UNCONDITIONALLY   */
2017         /* case to GC_push_next_marked, which is also applied to        */
2018         /* uncollectible blocks?  But it seems to me that the function  */
2019         /* does not need to scan uncollectible (and unconditionally     */
2020         /* marked) blocks since those are already handled in the        */
2021         /* MS_PUSH_UNCOLLECTABLE phase.                                 */
2022       } else
2023 #   endif
2024     /* else */ {
2025       GC_push_marked(h, hhdr);
2026     }
2027     return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
2028   }
2029 #endif /* !GC_DISABLE_INCREMENTAL */
2030 
2031 /* Similar to above, but for uncollectible pages.  Needed since we      */
2032 /* do not clear marks for such pages, even for full collections.        */
GC_push_next_marked_uncollectable(struct hblk * h)2033 STATIC struct hblk * GC_push_next_marked_uncollectable(struct hblk *h)
2034 {
2035     hdr * hhdr = HDR(h);
2036 
2037     for (;;) {
2038         if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr)
2039                    || HBLK_IS_FREE(hhdr), FALSE)) {
2040           h = GC_next_block(h, FALSE);
2041           if (NULL == h) return NULL;
2042           hhdr = GC_find_header((ptr_t)h);
2043         } else {
2044 #         ifdef LINT2
2045             if (NULL == h) ABORT("Bad HDR() definition");
2046 #         endif
2047         }
2048         if (hhdr -> hb_obj_kind == UNCOLLECTABLE) {
2049             GC_push_marked(h, hhdr);
2050             break;
2051         }
2052 #       ifdef ENABLE_DISCLAIM
2053             if ((hhdr -> hb_flags & MARK_UNCONDITIONALLY) != 0) {
2054                 GC_push_unconditionally(h, hhdr);
2055                 break;
2056             }
2057 #       endif
2058         h += OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz);
2059         hhdr = HDR(h);
2060     }
2061     return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
2062 }
2063