1 /*
2  * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
3  * Copyright (c) 1991-1995 by Xerox Corporation.  All rights reserved.
4  * Copyright (c) 2000 by Hewlett-Packard Company.  All rights reserved.
5  *
6  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
7  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
8  *
9  * Permission is hereby granted to use or copy this program
10  * for any purpose,  provided the above notices are retained on all copies.
11  * Permission to modify the code and to distribute modified code is granted,
12  * provided the above notices are retained, and a notice that the code was
13  * modified is included with the above copyright notice.
14  *
15  */
16 
17 #include "private/gc_pmark.h"
18 
19 #ifndef MAX_MARKERS
20 # define MAX_MARKERS 16
21 #endif
22 
23 #include <stdio.h>
24 
25 #if defined(MSWIN32) && defined(__GNUC__)
26 # include <excpt.h>
27 #endif
28 
29 /* Make arguments appear live to compiler.  Put here to minimize the    */
30 /* risk of inlining.  Used to minimize junk left in registers.          */
GC_noop6(word arg1 GC_ATTR_UNUSED,word arg2 GC_ATTR_UNUSED,word arg3 GC_ATTR_UNUSED,word arg4 GC_ATTR_UNUSED,word arg5 GC_ATTR_UNUSED,word arg6 GC_ATTR_UNUSED)31 void GC_noop6(word arg1 GC_ATTR_UNUSED, word arg2 GC_ATTR_UNUSED,
32               word arg3 GC_ATTR_UNUSED, word arg4 GC_ATTR_UNUSED,
33               word arg5 GC_ATTR_UNUSED, word arg6 GC_ATTR_UNUSED)
34 {
35   /* Empty */
36 }
37 
38 /* Single argument version, robust against whole program analysis. */
39 volatile word GC_noop_sink;
GC_noop1(word x)40 GC_API void GC_CALL GC_noop1(word x)
41 {
42     GC_noop_sink = x;
43 }
44 
45 /* mark_proc GC_mark_procs[MAX_MARK_PROCS] = {0} -- declared in gc_priv.h */
46 
47 GC_INNER unsigned GC_n_mark_procs = GC_RESERVED_MARK_PROCS;
48 
49 /* Initialize GC_obj_kinds properly and standard free lists properly.   */
50 /* This must be done statically since they may be accessed before       */
51 /* GC_init is called.                                                   */
52 /* It's done here, since we need to deal with mark descriptors.         */
53 GC_INNER struct obj_kind GC_obj_kinds[MAXOBJKINDS] = {
54 /* PTRFREE */ { &GC_aobjfreelist[0], 0 /* filled in dynamically */,
55                 /* 0 | */ GC_DS_LENGTH, FALSE, FALSE
56                 /*, */ OK_DISCLAIM_INITZ },
57 /* NORMAL  */ { &GC_objfreelist[0], 0,
58                 /* 0 | */ GC_DS_LENGTH,
59                                 /* adjusted in GC_init for EXTRA_BYTES  */
60                 TRUE /* add length to descr */, TRUE
61                 /*, */ OK_DISCLAIM_INITZ },
62 /* UNCOLLECTABLE */
63               { &GC_uobjfreelist[0], 0,
64                 /* 0 | */ GC_DS_LENGTH, TRUE /* add length to descr */, TRUE
65                 /*, */ OK_DISCLAIM_INITZ },
66 # ifdef ATOMIC_UNCOLLECTABLE
67    /* AUNCOLLECTABLE */
68               { &GC_auobjfreelist[0], 0,
69                 /* 0 | */ GC_DS_LENGTH, FALSE /* add length to descr */, FALSE
70                 /*, */ OK_DISCLAIM_INITZ },
71 # endif
72 # ifdef STUBBORN_ALLOC
73 /*STUBBORN*/ { (void **)&GC_sobjfreelist[0], 0,
74                 /* 0 | */ GC_DS_LENGTH, TRUE /* add length to descr */, TRUE
75                 /*, */ OK_DISCLAIM_INITZ },
76 # endif
77 };
78 
79 # ifdef ATOMIC_UNCOLLECTABLE
80 #   ifdef STUBBORN_ALLOC
81 #     define GC_N_KINDS_INITIAL_VALUE 5
82 #   else
83 #     define GC_N_KINDS_INITIAL_VALUE 4
84 #   endif
85 # else
86 #   ifdef STUBBORN_ALLOC
87 #     define GC_N_KINDS_INITIAL_VALUE 4
88 #   else
89 #     define GC_N_KINDS_INITIAL_VALUE 3
90 #   endif
91 # endif
92 
93 GC_INNER unsigned GC_n_kinds = GC_N_KINDS_INITIAL_VALUE;
94 
95 # ifndef INITIAL_MARK_STACK_SIZE
96 #   define INITIAL_MARK_STACK_SIZE (1*HBLKSIZE)
97                 /* INITIAL_MARK_STACK_SIZE * sizeof(mse) should be a    */
98                 /* multiple of HBLKSIZE.                                */
99                 /* The incremental collector actually likes a larger    */
100                 /* size, since it want to push all marked dirty objs    */
101                 /* before marking anything new.  Currently we let it    */
102                 /* grow dynamically.                                    */
103 # endif
104 
105 STATIC word GC_n_rescuing_pages = 0;
106                                 /* Number of dirty pages we marked from */
107                                 /* excludes ptrfree pages, etc.         */
108 
109 GC_INNER size_t GC_mark_stack_size = 0;
110 
111 #ifdef PARALLEL_MARK
112   STATIC volatile AO_t GC_first_nonempty = 0;
113         /* Lowest entry on mark stack   */
114         /* that may be nonempty.        */
115         /* Updated only by initiating   */
116         /* thread.                      */
117 #endif
118 
119 GC_INNER mark_state_t GC_mark_state = MS_NONE;
120 
121 GC_INNER GC_bool GC_mark_stack_too_small = FALSE;
122 
123 static struct hblk * scan_ptr;
124 
125 STATIC GC_bool GC_objects_are_marked = FALSE;
126                 /* Are there collectible marked objects in the heap?    */
127 
128 /* Is a collection in progress?  Note that this can return true in the  */
129 /* nonincremental case, if a collection has been abandoned and the      */
130 /* mark state is now MS_INVALID.                                        */
GC_collection_in_progress(void)131 GC_INNER GC_bool GC_collection_in_progress(void)
132 {
133     return(GC_mark_state != MS_NONE);
134 }
135 
136 /* clear all mark bits in the header */
GC_clear_hdr_marks(hdr * hhdr)137 GC_INNER void GC_clear_hdr_marks(hdr *hhdr)
138 {
139     size_t last_bit = FINAL_MARK_BIT(hhdr -> hb_sz);
140     BZERO(hhdr -> hb_marks, sizeof(hhdr->hb_marks));
141     set_mark_bit_from_hdr(hhdr, last_bit);
142     hhdr -> hb_n_marks = 0;
143 }
144 
145 /* Set all mark bits in the header.  Used for uncollectible blocks. */
GC_set_hdr_marks(hdr * hhdr)146 GC_INNER void GC_set_hdr_marks(hdr *hhdr)
147 {
148     unsigned i;
149     size_t sz = hhdr -> hb_sz;
150     unsigned n_marks = (unsigned)FINAL_MARK_BIT(sz);
151 
152 #   ifdef USE_MARK_BYTES
153       for (i = 0; i <= n_marks; i += (unsigned)MARK_BIT_OFFSET(sz)) {
154         hhdr -> hb_marks[i] = 1;
155       }
156 #   else
157       for (i = 0; i < divWORDSZ(n_marks + WORDSZ); ++i) {
158         hhdr -> hb_marks[i] = ONES;
159       }
160 #   endif
161 #   ifdef MARK_BIT_PER_OBJ
162       hhdr -> hb_n_marks = n_marks - 1;
163 #   else
164       hhdr -> hb_n_marks = HBLK_OBJS(sz);
165 #   endif
166 }
167 
168 /*
169  * Clear all mark bits associated with block h.
170  */
clear_marks_for_block(struct hblk * h,word dummy GC_ATTR_UNUSED)171 static void clear_marks_for_block(struct hblk *h, word dummy GC_ATTR_UNUSED)
172 {
173     register hdr * hhdr = HDR(h);
174 
175     if (IS_UNCOLLECTABLE(hhdr -> hb_obj_kind)) return;
176         /* Mark bit for these is cleared only once the object is        */
177         /* explicitly deallocated.  This either frees the block, or     */
178         /* the bit is cleared once the object is on the free list.      */
179     GC_clear_hdr_marks(hhdr);
180 }
181 
182 /* Slow but general routines for setting/clearing/asking about mark bits */
GC_set_mark_bit(const void * p)183 GC_API void GC_CALL GC_set_mark_bit(const void *p)
184 {
185     struct hblk *h = HBLKPTR(p);
186     hdr * hhdr = HDR(h);
187     word bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, hhdr -> hb_sz);
188 
189     if (!mark_bit_from_hdr(hhdr, bit_no)) {
190       set_mark_bit_from_hdr(hhdr, bit_no);
191       ++hhdr -> hb_n_marks;
192     }
193 }
194 
GC_clear_mark_bit(const void * p)195 GC_API void GC_CALL GC_clear_mark_bit(const void *p)
196 {
197     struct hblk *h = HBLKPTR(p);
198     hdr * hhdr = HDR(h);
199     word bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, hhdr -> hb_sz);
200 
201     if (mark_bit_from_hdr(hhdr, bit_no)) {
202       size_t n_marks;
203       clear_mark_bit_from_hdr(hhdr, bit_no);
204       n_marks = hhdr -> hb_n_marks - 1;
205 #     ifdef PARALLEL_MARK
206         if (n_marks != 0 || !GC_parallel)
207           hhdr -> hb_n_marks = n_marks;
208         /* Don't decrement to zero.  The counts are approximate due to  */
209         /* concurrency issues, but we need to ensure that a count of    */
210         /* zero implies an empty block.                                 */
211 #     else
212           hhdr -> hb_n_marks = n_marks;
213 #     endif
214     }
215 }
216 
GC_is_marked(const void * p)217 GC_API int GC_CALL GC_is_marked(const void *p)
218 {
219     struct hblk *h = HBLKPTR(p);
220     hdr * hhdr = HDR(h);
221     word bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, hhdr -> hb_sz);
222 
223     return (int)mark_bit_from_hdr(hhdr, bit_no); /* 0 or 1 */
224 }
225 
226 /*
227  * Clear mark bits in all allocated heap blocks.  This invalidates
228  * the marker invariant, and sets GC_mark_state to reflect this.
229  * (This implicitly starts marking to reestablish the invariant.)
230  */
GC_clear_marks(void)231 GC_INNER void GC_clear_marks(void)
232 {
233     GC_apply_to_all_blocks(clear_marks_for_block, (word)0);
234     GC_objects_are_marked = FALSE;
235     GC_mark_state = MS_INVALID;
236     scan_ptr = 0;
237 }
238 
239 #ifdef CHECKSUMS
240   void GC_check_dirty(void);
241 #endif
242 
243 /* Initiate a garbage collection.  Initiates a full collection if the   */
244 /* mark state is invalid.                                               */
GC_initiate_gc(void)245 GC_INNER void GC_initiate_gc(void)
246 {
247 #   ifndef GC_DISABLE_INCREMENTAL
248         if (GC_dirty_maintained) GC_read_dirty();
249 #   endif
250 #   ifdef STUBBORN_ALLOC
251         GC_read_changed();
252 #   endif
253 #   ifdef CHECKSUMS
254         if (GC_dirty_maintained) GC_check_dirty();
255 #   endif
256     GC_n_rescuing_pages = 0;
257     if (GC_mark_state == MS_NONE) {
258         GC_mark_state = MS_PUSH_RESCUERS;
259     } else if (GC_mark_state != MS_INVALID) {
260         ABORT("Unexpected state");
261     } /* else this is really a full collection, and mark        */
262       /* bits are invalid.                                      */
263     scan_ptr = 0;
264 }
265 
266 #ifdef PARALLEL_MARK
267     STATIC void GC_do_parallel_mark(void); /* initiate parallel marking. */
268 #endif /* PARALLEL_MARK */
269 
270 #ifdef GC_DISABLE_INCREMENTAL
271 # define GC_push_next_marked_dirty(h) GC_push_next_marked(h)
272 #else
273   STATIC struct hblk * GC_push_next_marked_dirty(struct hblk *h);
274                 /* Invoke GC_push_marked on next dirty block above h.   */
275                 /* Return a pointer just past the end of this block.    */
276 #endif /* !GC_DISABLE_INCREMENTAL */
277 STATIC struct hblk * GC_push_next_marked(struct hblk *h);
278                 /* Ditto, but also mark from clean pages.       */
279 STATIC struct hblk * GC_push_next_marked_uncollectable(struct hblk *h);
280                 /* Ditto, but mark only from uncollectible pages.       */
281 
282 static void alloc_mark_stack(size_t);
283 
284 # if (((defined(MSWIN32) || defined(MSWINCE)) && !defined(__GNUC__)) \
285         || (defined(MSWIN32) && defined(I386)) /* for Win98 */ \
286         || (defined(USE_PROC_FOR_LIBRARIES) && defined(THREADS))) \
287      && !defined(NO_WRAP_MARK_SOME)
288     /* Under rare conditions, we may end up marking from nonexistent memory. */
289     /* Hence we need to be prepared to recover by running GC_mark_some       */
290     /* with a suitable handler in place.                                     */
291     /* FIXME: Should we really need it for WinCE?  If yes then          */
292     /* WRAP_MARK_SOME should be also defined for CeGCC which requires   */
293     /* CPU/OS-specific code in mark_ex_handler() and GC_mark_some()     */
294     /* (for manual stack unwinding and exception handler installation). */
295 #   define WRAP_MARK_SOME
296 # endif
297 
298 /* Perform a small amount of marking.                   */
299 /* We try to touch roughly a page of memory.            */
300 /* Return TRUE if we just finished a mark phase.        */
301 /* Cold_gc_frame is an address inside a GC frame that   */
302 /* remains valid until all marking is complete.         */
303 /* A zero value indicates that it's OK to miss some     */
304 /* register values.                                     */
305 /* We hold the allocation lock.  In the case of         */
306 /* incremental collection, the world may not be stopped.*/
307 #ifdef WRAP_MARK_SOME
308   /* For win32, this is called after we establish a structured  */
309   /* exception handler, in case Windows unmaps one of our root  */
310   /* segments.  See below.  In either case, we acquire the      */
311   /* allocator lock long before we get here.                    */
GC_mark_some_inner(ptr_t cold_gc_frame)312   STATIC GC_bool GC_mark_some_inner(ptr_t cold_gc_frame)
313 #else
314   GC_INNER GC_bool GC_mark_some(ptr_t cold_gc_frame)
315 #endif
316 {
317     switch(GC_mark_state) {
318         case MS_NONE:
319             break;
320 
321         case MS_PUSH_RESCUERS:
322             if ((word)GC_mark_stack_top
323                 >= (word)(GC_mark_stack_limit - INITIAL_MARK_STACK_SIZE/2)) {
324                 /* Go ahead and mark, even though that might cause us to */
325                 /* see more marked dirty objects later on.  Avoid this   */
326                 /* in the future.                                        */
327                 GC_mark_stack_too_small = TRUE;
328                 MARK_FROM_MARK_STACK();
329                 break;
330             } else {
331                 scan_ptr = GC_push_next_marked_dirty(scan_ptr);
332                 if (scan_ptr == 0) {
333                     GC_COND_LOG_PRINTF("Marked from %lu dirty pages\n",
334                                        (unsigned long)GC_n_rescuing_pages);
335                     GC_push_roots(FALSE, cold_gc_frame);
336                     GC_objects_are_marked = TRUE;
337                     if (GC_mark_state != MS_INVALID) {
338                         GC_mark_state = MS_ROOTS_PUSHED;
339                     }
340                 }
341             }
342             break;
343 
344         case MS_PUSH_UNCOLLECTABLE:
345             if ((word)GC_mark_stack_top
346                 >= (word)(GC_mark_stack + GC_mark_stack_size/4)) {
347 #               ifdef PARALLEL_MARK
348                   /* Avoid this, since we don't parallelize the marker  */
349                   /* here.                                              */
350                   if (GC_parallel) GC_mark_stack_too_small = TRUE;
351 #               endif
352                 MARK_FROM_MARK_STACK();
353                 break;
354             } else {
355                 scan_ptr = GC_push_next_marked_uncollectable(scan_ptr);
356                 if (scan_ptr == 0) {
357                     GC_push_roots(TRUE, cold_gc_frame);
358                     GC_objects_are_marked = TRUE;
359                     if (GC_mark_state != MS_INVALID) {
360                         GC_mark_state = MS_ROOTS_PUSHED;
361                     }
362                 }
363             }
364             break;
365 
366         case MS_ROOTS_PUSHED:
367 #           ifdef PARALLEL_MARK
368               /* In the incremental GC case, this currently doesn't     */
369               /* quite do the right thing, since it runs to             */
370               /* completion.  On the other hand, starting a             */
371               /* parallel marker is expensive, so perhaps it is         */
372               /* the right thing?                                       */
373               /* Eventually, incremental marking should run             */
374               /* asynchronously in multiple threads, without grabbing   */
375               /* the allocation lock.                                   */
376                 if (GC_parallel) {
377                   GC_do_parallel_mark();
378                   GC_ASSERT((word)GC_mark_stack_top < (word)GC_first_nonempty);
379                   GC_mark_stack_top = GC_mark_stack - 1;
380                   if (GC_mark_stack_too_small) {
381                     alloc_mark_stack(2*GC_mark_stack_size);
382                   }
383                   if (GC_mark_state == MS_ROOTS_PUSHED) {
384                     GC_mark_state = MS_NONE;
385                     return(TRUE);
386                   }
387                   break;
388                 }
389 #           endif
390             if ((word)GC_mark_stack_top >= (word)GC_mark_stack) {
391                 MARK_FROM_MARK_STACK();
392                 break;
393             } else {
394                 GC_mark_state = MS_NONE;
395                 if (GC_mark_stack_too_small) {
396                     alloc_mark_stack(2*GC_mark_stack_size);
397                 }
398                 return(TRUE);
399             }
400 
401         case MS_INVALID:
402         case MS_PARTIALLY_INVALID:
403             if (!GC_objects_are_marked) {
404                 GC_mark_state = MS_PUSH_UNCOLLECTABLE;
405                 break;
406             }
407             if ((word)GC_mark_stack_top >= (word)GC_mark_stack) {
408                 MARK_FROM_MARK_STACK();
409                 break;
410             }
411             if (scan_ptr == 0 && GC_mark_state == MS_INVALID) {
412                 /* About to start a heap scan for marked objects. */
413                 /* Mark stack is empty.  OK to reallocate.        */
414                 if (GC_mark_stack_too_small) {
415                     alloc_mark_stack(2*GC_mark_stack_size);
416                 }
417                 GC_mark_state = MS_PARTIALLY_INVALID;
418             }
419             scan_ptr = GC_push_next_marked(scan_ptr);
420             if (scan_ptr == 0 && GC_mark_state == MS_PARTIALLY_INVALID) {
421                 GC_push_roots(TRUE, cold_gc_frame);
422                 GC_objects_are_marked = TRUE;
423                 if (GC_mark_state != MS_INVALID) {
424                     GC_mark_state = MS_ROOTS_PUSHED;
425                 }
426             }
427             break;
428 
429         default:
430             ABORT("GC_mark_some: bad state");
431     }
432     return(FALSE);
433 }
434 
435 #ifdef WRAP_MARK_SOME
436 
437 # if (defined(MSWIN32) || defined(MSWINCE)) && defined(__GNUC__)
438 
439     typedef struct {
440       EXCEPTION_REGISTRATION ex_reg;
441       void *alt_path;
442     } ext_ex_regn;
443 
mark_ex_handler(struct _EXCEPTION_RECORD * ex_rec,void * est_frame,struct _CONTEXT * context,void * disp_ctxt GC_ATTR_UNUSED)444     static EXCEPTION_DISPOSITION mark_ex_handler(
445         struct _EXCEPTION_RECORD *ex_rec,
446         void *est_frame,
447         struct _CONTEXT *context,
448         void *disp_ctxt GC_ATTR_UNUSED)
449     {
450         if (ex_rec->ExceptionCode == STATUS_ACCESS_VIOLATION) {
451           ext_ex_regn *xer = (ext_ex_regn *)est_frame;
452 
453           /* Unwind from the inner function assuming the standard */
454           /* function prologue.                                   */
455           /* Assumes code has not been compiled with              */
456           /* -fomit-frame-pointer.                                */
457           context->Esp = context->Ebp;
458           context->Ebp = *((DWORD *)context->Esp);
459           context->Esp = context->Esp - 8;
460 
461           /* Resume execution at the "real" handler within the    */
462           /* wrapper function.                                    */
463           context->Eip = (DWORD )(xer->alt_path);
464 
465           return ExceptionContinueExecution;
466 
467         } else {
468             return ExceptionContinueSearch;
469         }
470     }
471 # endif /* __GNUC__  && MSWIN32 */
472 
473 #if defined(GC_WIN32_THREADS) && !defined(__GNUC__)
474   GC_INNER GC_bool GC_started_thread_while_stopped(void);
475   /* In win32_threads.c.  Did we invalidate mark phase with an  */
476   /* unexpected thread start?                                   */
477 #endif
478 
GC_mark_some(ptr_t cold_gc_frame)479   GC_INNER GC_bool GC_mark_some(ptr_t cold_gc_frame)
480   {
481       GC_bool ret_val;
482 
483 #   if defined(MSWIN32) || defined(MSWINCE)
484 #    ifndef __GNUC__
485       /* Windows 98 appears to asynchronously create and remove  */
486       /* writable memory mappings, for reasons we haven't yet    */
487       /* understood.  Since we look for writable regions to      */
488       /* determine the root set, we may try to mark from an      */
489       /* address range that disappeared since we started the     */
490       /* collection.  Thus we have to recover from faults here.  */
491       /* This code does not appear to be necessary for Windows   */
492       /* 95/NT/2000+. Note that this code should never generate  */
493       /* an incremental GC write fault.                          */
494       /* This code seems to be necessary for WinCE (at least in  */
495       /* the case we'd decide to add MEM_PRIVATE sections to     */
496       /* data roots in GC_register_dynamic_libraries()).         */
497       /* It's conceivable that this is the same issue with       */
498       /* terminating threads that we see with Linux and          */
499       /* USE_PROC_FOR_LIBRARIES.                                 */
500 
501       __try {
502           ret_val = GC_mark_some_inner(cold_gc_frame);
503       } __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
504                 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
505           goto handle_ex;
506       }
507 #     ifdef GC_WIN32_THREADS
508         /* With DllMain-based thread tracking, a thread may have        */
509         /* started while we were marking.  This is logically equivalent */
510         /* to the exception case; our results are invalid and we have   */
511         /* to start over.  This cannot be prevented since we can't      */
512         /* block in DllMain.                                            */
513         if (GC_started_thread_while_stopped()) goto handle_ex;
514 #     endif
515      rm_handler:
516       return ret_val;
517 
518 #    else /* __GNUC__ */
519 
520       /* Manually install an exception handler since GCC does    */
521       /* not yet support Structured Exception Handling (SEH) on  */
522       /* Win32.                                                  */
523 
524       ext_ex_regn er;
525 
526       er.alt_path = &&handle_ex;
527       er.ex_reg.handler = mark_ex_handler;
528       __asm__ __volatile__ ("movl %%fs:0, %0" : "=r" (er.ex_reg.prev));
529       __asm__ __volatile__ ("movl %0, %%fs:0" : : "r" (&er));
530       ret_val = GC_mark_some_inner(cold_gc_frame);
531       /* Prevent GCC from considering the following code unreachable */
532       /* and thus eliminating it.                                    */
533         if (er.alt_path == 0)
534           goto handle_ex;
535     rm_handler:
536       /* Uninstall the exception handler */
537       __asm__ __volatile__ ("mov %0, %%fs:0" : : "r" (er.ex_reg.prev));
538       return ret_val;
539 
540 #    endif /* __GNUC__ */
541 #   else /* !MSWIN32 */
542       /* Here we are handling the case in which /proc is used for root  */
543       /* finding, and we have threads.  We may find a stack for a       */
544       /* thread that is in the process of exiting, and disappears       */
545       /* while we are marking it.  This seems extremely difficult to    */
546       /* avoid otherwise.                                               */
547       if (GC_incremental) {
548         WARN("Incremental GC incompatible with /proc roots\n", 0);
549         /* I'm not sure if this could still work ...    */
550       }
551       GC_setup_temporary_fault_handler();
552       if(SETJMP(GC_jmp_buf) != 0) goto handle_ex;
553       ret_val = GC_mark_some_inner(cold_gc_frame);
554     rm_handler:
555       GC_reset_fault_handler();
556       return ret_val;
557 
558 #   endif /* !MSWIN32 */
559 
560 handle_ex:
561     /* Exception handler starts here for all cases. */
562       WARN("Caught ACCESS_VIOLATION in marker;"
563            " memory mapping disappeared\n", 0);
564 
565       /* We have bad roots on the stack.  Discard mark stack.   */
566       /* Rescan from marked objects.  Redetermine roots.        */
567       GC_invalidate_mark_state();
568       scan_ptr = 0;
569 
570       ret_val = FALSE;
571       goto rm_handler;  /* Back to platform-specific code. */
572   }
573 #endif /* WRAP_MARK_SOME */
574 
GC_invalidate_mark_state(void)575 GC_INNER void GC_invalidate_mark_state(void)
576 {
577     GC_mark_state = MS_INVALID;
578     GC_mark_stack_top = GC_mark_stack-1;
579 }
580 
GC_signal_mark_stack_overflow(mse * msp)581 GC_INNER mse * GC_signal_mark_stack_overflow(mse *msp)
582 {
583     GC_mark_state = MS_INVALID;
584 #   ifdef PARALLEL_MARK
585       /* We are using a local_mark_stack in parallel mode, so   */
586       /* do not signal the global mark stack to be resized.     */
587       /* That will be done if required in GC_return_mark_stack. */
588       if (!GC_parallel)
589         GC_mark_stack_too_small = TRUE;
590 #   else
591       GC_mark_stack_too_small = TRUE;
592 #   endif
593     GC_COND_LOG_PRINTF("Mark stack overflow; current size = %lu entries\n",
594                        (unsigned long)GC_mark_stack_size);
595     return(msp - GC_MARK_STACK_DISCARDS);
596 }
597 
598 /*
599  * Mark objects pointed to by the regions described by
600  * mark stack entries between mark_stack and mark_stack_top,
601  * inclusive.  Assumes the upper limit of a mark stack entry
602  * is never 0.  A mark stack entry never has size 0.
603  * We try to traverse on the order of a hblk of memory before we return.
604  * Caller is responsible for calling this until the mark stack is empty.
605  * Note that this is the most performance critical routine in the
606  * collector.  Hence it contains all sorts of ugly hacks to speed
607  * things up.  In particular, we avoid procedure calls on the common
608  * path, we take advantage of peculiarities of the mark descriptor
609  * encoding, we optionally maintain a cache for the block address to
610  * header mapping, we prefetch when an object is "grayed", etc.
611  */
GC_mark_from(mse * mark_stack_top,mse * mark_stack,mse * mark_stack_limit)612 GC_INNER mse * GC_mark_from(mse *mark_stack_top, mse *mark_stack,
613                             mse *mark_stack_limit)
614 {
615   signed_word credit = HBLKSIZE;  /* Remaining credit for marking work  */
616   ptr_t current_p;      /* Pointer to current candidate ptr.            */
617   word current;         /* Candidate pointer.                           */
618   ptr_t limit = 0;      /* (Incl) limit of current candidate range.     */
619   word descr;
620   ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
621   ptr_t least_ha = GC_least_plausible_heap_addr;
622   DECLARE_HDR_CACHE;
623 
624 # define SPLIT_RANGE_WORDS 128  /* Must be power of 2.          */
625 
626   GC_objects_are_marked = TRUE;
627   INIT_HDR_CACHE;
628 # ifdef OS2 /* Use untweaked version to circumvent compiler problem */
629     while ((word)mark_stack_top >= (word)mark_stack && credit >= 0)
630 # else
631     while ((((ptr_t)mark_stack_top - (ptr_t)mark_stack) | credit) >= 0)
632 # endif
633   {
634     current_p = mark_stack_top -> mse_start;
635     descr = mark_stack_top -> mse_descr.w;
636   retry:
637     /* current_p and descr describe the current object.         */
638     /* *mark_stack_top is vacant.                               */
639     /* The following is 0 only for small objects described by a simple  */
640     /* length descriptor.  For many applications this is the common     */
641     /* case, so we try to detect it quickly.                            */
642     if (descr & ((~(WORDS_TO_BYTES(SPLIT_RANGE_WORDS) - 1)) | GC_DS_TAGS)) {
643       word tag = descr & GC_DS_TAGS;
644 
645       GC_STATIC_ASSERT(GC_DS_TAGS == 0x3);
646       switch(tag) {
647         case GC_DS_LENGTH:
648           /* Large length.                                              */
649           /* Process part of the range to avoid pushing too much on the */
650           /* stack.                                                     */
651           GC_ASSERT(descr < (word)GC_greatest_plausible_heap_addr
652                             - (word)GC_least_plausible_heap_addr);
653 #         ifdef ENABLE_TRACE
654             if ((word)GC_trace_addr >= (word)current_p
655                 && (word)GC_trace_addr < (word)(current_p + descr)) {
656               GC_log_printf("GC #%u: large section; start %p, len %lu\n",
657                         (unsigned)GC_gc_no, current_p, (unsigned long)descr);
658             }
659 #         endif /* ENABLE_TRACE */
660 #         ifdef PARALLEL_MARK
661 #           define SHARE_BYTES 2048
662             if (descr > SHARE_BYTES && GC_parallel
663                 && (word)mark_stack_top < (word)(mark_stack_limit - 1)) {
664               int new_size = (descr/2) & ~(sizeof(word)-1);
665               mark_stack_top -> mse_start = current_p;
666               mark_stack_top -> mse_descr.w = new_size + sizeof(word);
667                                         /* makes sure we handle         */
668                                         /* misaligned pointers.         */
669               mark_stack_top++;
670 #             ifdef ENABLE_TRACE
671                 if ((word)GC_trace_addr >= (word)current_p
672                     && (word)GC_trace_addr < (word)(current_p + descr)) {
673                   GC_log_printf("GC #%u: splitting (parallel) %p at %p\n",
674                         (unsigned)GC_gc_no, current_p, current_p + new_size);
675                 }
676 #             endif /* ENABLE_TRACE */
677               current_p += new_size;
678               descr -= new_size;
679               goto retry;
680             }
681 #         endif /* PARALLEL_MARK */
682           mark_stack_top -> mse_start =
683                 limit = current_p + WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1);
684           mark_stack_top -> mse_descr.w =
685                                 descr - WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1);
686 #         ifdef ENABLE_TRACE
687             if ((word)GC_trace_addr >= (word)current_p
688                 && (word)GC_trace_addr < (word)(current_p + descr)) {
689               GC_log_printf("GC #%u: splitting %p at %p\n",
690                             (unsigned)GC_gc_no, current_p, limit);
691             }
692 #         endif /* ENABLE_TRACE */
693           /* Make sure that pointers overlapping the two ranges are     */
694           /* considered.                                                */
695           limit += sizeof(word) - ALIGNMENT;
696           break;
697         case GC_DS_BITMAP:
698           mark_stack_top--;
699 #         ifdef ENABLE_TRACE
700             if ((word)GC_trace_addr >= (word)current_p
701                 && (word)GC_trace_addr < (word)(current_p
702                                                 + WORDS_TO_BYTES(WORDSZ-2))) {
703               GC_log_printf("GC #%u: tracing from %p bitmap descr %lu\n",
704                             (unsigned)GC_gc_no, current_p,
705                             (unsigned long)descr);
706             }
707 #         endif /* ENABLE_TRACE */
708           descr &= ~GC_DS_TAGS;
709           credit -= WORDS_TO_BYTES(WORDSZ/2); /* guess */
710           while (descr != 0) {
711             if ((signed_word)descr < 0) {
712               current = *(word *)current_p;
713               FIXUP_POINTER(current);
714               if (current >= (word)least_ha && current < (word)greatest_ha) {
715                 PREFETCH((ptr_t)current);
716 #               ifdef ENABLE_TRACE
717                   if (GC_trace_addr == current_p) {
718                     GC_log_printf("GC #%u: considering(3) %p -> %p\n",
719                                   (unsigned)GC_gc_no, current_p,
720                                   (ptr_t)current);
721                   }
722 #               endif /* ENABLE_TRACE */
723                 PUSH_CONTENTS((ptr_t)current, mark_stack_top,
724                               mark_stack_limit, current_p, exit1);
725               }
726             }
727             descr <<= 1;
728             current_p += sizeof(word);
729           }
730           continue;
731         case GC_DS_PROC:
732           mark_stack_top--;
733 #         ifdef ENABLE_TRACE
734             if ((word)GC_trace_addr >= (word)current_p
735                 && GC_base(current_p) != 0
736                 && GC_base(current_p) == GC_base(GC_trace_addr)) {
737               GC_log_printf("GC #%u: tracing from %p, proc descr %lu\n",
738                             (unsigned)GC_gc_no, current_p,
739                             (unsigned long)descr);
740             }
741 #         endif /* ENABLE_TRACE */
742           credit -= GC_PROC_BYTES;
743           mark_stack_top = (*PROC(descr))((word *)current_p, mark_stack_top,
744                                           mark_stack_limit, ENV(descr));
745           continue;
746         case GC_DS_PER_OBJECT:
747           if ((signed_word)descr >= 0) {
748             /* Descriptor is in the object.     */
749             descr = *(word *)(current_p + descr - GC_DS_PER_OBJECT);
750           } else {
751             /* Descriptor is in type descriptor pointed to by first     */
752             /* word in object.                                          */
753             ptr_t type_descr = *(ptr_t *)current_p;
754             /* type_descr is either a valid pointer to the descriptor   */
755             /* structure, or this object was on a free list.  If it     */
756             /* it was anything but the last object on the free list,    */
757             /* we will misinterpret the next object on the free list as */
758             /* the type descriptor, and get a 0 GC descriptor, which    */
759             /* is ideal.  Unfortunately, we need to check for the last  */
760             /* object case explicitly.                                  */
761             if (0 == type_descr) {
762                 /* Rarely executed.     */
763                 mark_stack_top--;
764                 continue;
765             }
766             if ((GC_word)(type_descr) >= (GC_word)GC_least_plausible_heap_addr
767                     && (GC_word)(type_descr)
768                         <= (GC_word)GC_greatest_plausible_heap_addr) {
769                 /* type_descr looks like a pointer into the heap.       */
770                 /* It could still be the link pointer in a free list    */
771                 /* though.  That's not a problem as long as the offset  */
772                 /* of the actual descriptor in the pointed to object is */
773                 /* within the same object.  In that case it will either */
774                 /* point at the next free object in the list (if offset */
775                 /* is 0) or be zeroed (which we check for below,        */
776                 /* descr == 0).  If the offset is larger than the       */
777                 /* objects in the block type_descr points to it cannot  */
778                 /* be a proper pointer.                                 */
779                 word offset = ~(descr + (GC_INDIR_PER_OBJ_BIAS
780                                          - GC_DS_PER_OBJECT - 1));
781                 hdr *hhdr;
782                 GET_HDR(type_descr, hhdr);
783                 if (NULL == hhdr || hhdr->hb_sz - sizeof(word) < offset) {
784                     mark_stack_top--;
785                     continue;
786                 }
787             }
788             descr = *(word *)(type_descr
789                               - (descr + (GC_INDIR_PER_OBJ_BIAS
790                                           - GC_DS_PER_OBJECT)));
791           }
792           if (0 == descr) {
793               /* Can happen either because we generated a 0 descriptor  */
794               /* or we saw a pointer to a free object.          */
795               mark_stack_top--;
796               continue;
797           }
798           goto retry;
799       }
800     } else /* Small object with length descriptor */ {
801       mark_stack_top--;
802 #     ifndef SMALL_CONFIG
803         if (descr < sizeof(word))
804           continue;
805 #     endif
806       limit = current_p + (word)descr;
807     }
808 #   ifdef ENABLE_TRACE
809         if ((word)GC_trace_addr >= (word)current_p
810             && (word)GC_trace_addr < (word)limit) {
811           GC_log_printf("GC #%u: Tracing from %p, length is %lu\n",
812                         (unsigned)GC_gc_no, current_p, (unsigned long)descr);
813         }
814 #   endif /* ENABLE_TRACE */
815     /* The simple case in which we're scanning a range. */
816     GC_ASSERT(!((word)current_p & (ALIGNMENT-1)));
817     credit -= limit - current_p;
818     limit -= sizeof(word);
819     {
820 #     define PREF_DIST 4
821 
822 #     ifndef SMALL_CONFIG
823         word deferred;
824 
825         /* Try to prefetch the next pointer to be examined ASAP.        */
826         /* Empirically, this also seems to help slightly without        */
827         /* prefetches, at least on linux/X86.  Presumably this loop     */
828         /* ends up with less register pressure, and gcc thus ends up    */
829         /* generating slightly better code.  Overall gcc code quality   */
830         /* for this loop is still not great.                            */
831         for(;;) {
832           PREFETCH(limit - PREF_DIST*CACHE_LINE_SIZE);
833           GC_ASSERT((word)limit >= (word)current_p);
834           deferred = *(word *)limit;
835           FIXUP_POINTER(deferred);
836           limit -= ALIGNMENT;
837           if (deferred >= (word)least_ha && deferred < (word)greatest_ha) {
838             PREFETCH((ptr_t)deferred);
839             break;
840           }
841           if ((word)current_p > (word)limit) goto next_object;
842           /* Unroll once, so we don't do too many of the prefetches     */
843           /* based on limit.                                            */
844           deferred = *(word *)limit;
845           FIXUP_POINTER(deferred);
846           limit -= ALIGNMENT;
847           if (deferred >= (word)least_ha && deferred < (word)greatest_ha) {
848             PREFETCH((ptr_t)deferred);
849             break;
850           }
851           if ((word)current_p > (word)limit) goto next_object;
852         }
853 #     endif
854 
855       while ((word)current_p <= (word)limit) {
856         /* Empirically, unrolling this loop doesn't help a lot. */
857         /* Since PUSH_CONTENTS expands to a lot of code,        */
858         /* we don't.                                            */
859         current = *(word *)current_p;
860         FIXUP_POINTER(current);
861         PREFETCH(current_p + PREF_DIST*CACHE_LINE_SIZE);
862         if (current >= (word)least_ha && current < (word)greatest_ha) {
863           /* Prefetch the contents of the object we just pushed.  It's  */
864           /* likely we will need them soon.                             */
865           PREFETCH((ptr_t)current);
866 #         ifdef ENABLE_TRACE
867             if (GC_trace_addr == current_p) {
868               GC_log_printf("GC #%u: considering(1) %p -> %p\n",
869                             (unsigned)GC_gc_no, current_p, (ptr_t)current);
870             }
871 #         endif /* ENABLE_TRACE */
872           PUSH_CONTENTS((ptr_t)current, mark_stack_top,
873                         mark_stack_limit, current_p, exit2);
874         }
875         current_p += ALIGNMENT;
876       }
877 
878 #     ifndef SMALL_CONFIG
879         /* We still need to mark the entry we previously prefetched.    */
880         /* We already know that it passes the preliminary pointer       */
881         /* validity test.                                               */
882 #       ifdef ENABLE_TRACE
883             if (GC_trace_addr == current_p) {
884               GC_log_printf("GC #%u: considering(2) %p -> %p\n",
885                             (unsigned)GC_gc_no, current_p, (ptr_t)deferred);
886             }
887 #       endif /* ENABLE_TRACE */
888         PUSH_CONTENTS((ptr_t)deferred, mark_stack_top,
889                       mark_stack_limit, current_p, exit4);
890         next_object:;
891 #     endif
892     }
893   }
894   return mark_stack_top;
895 }
896 
897 #ifdef PARALLEL_MARK
898 
899 STATIC GC_bool GC_help_wanted = FALSE;  /* Protected by mark lock       */
900 STATIC unsigned GC_helper_count = 0;    /* Number of running helpers.   */
901                                         /* Protected by mark lock       */
902 STATIC unsigned GC_active_count = 0;    /* Number of active helpers.    */
903                                         /* Protected by mark lock       */
904                                         /* May increase and decrease    */
905                                         /* within each mark cycle.  But */
906                                         /* once it returns to 0, it     */
907                                         /* stays zero for the cycle.    */
908 
909 GC_INNER word GC_mark_no = 0;
910 
911 #define LOCAL_MARK_STACK_SIZE HBLKSIZE
912         /* Under normal circumstances, this is big enough to guarantee  */
913         /* we don't overflow half of it in a single call to             */
914         /* GC_mark_from.                                                */
915 
916 /* Wait all markers to finish initialization (i.e. store        */
917 /* marker_[b]sp, marker_mach_threads, GC_marker_Id).            */
GC_wait_for_markers_init(void)918 GC_INNER void GC_wait_for_markers_init(void)
919 {
920   word count;
921 
922   if (GC_markers_m1 == 0)
923     return;
924 
925   /* Reuse marker lock and builders count to synchronize        */
926   /* marker threads startup.                                    */
927   GC_acquire_mark_lock();
928   GC_fl_builder_count += (word)GC_markers_m1;
929   count = GC_fl_builder_count;
930   GC_release_mark_lock();
931   if (count != 0)
932     GC_wait_for_reclaim();
933 }
934 
935 /* Steal mark stack entries starting at mse low into mark stack local   */
936 /* until we either steal mse high, or we have max entries.              */
937 /* Return a pointer to the top of the local mark stack.                 */
938 /* *next is replaced by a pointer to the next unscanned mark stack      */
939 /* entry.                                                               */
GC_steal_mark_stack(mse * low,mse * high,mse * local,unsigned max,mse ** next)940 STATIC mse * GC_steal_mark_stack(mse * low, mse * high, mse * local,
941                                  unsigned max, mse **next)
942 {
943     mse *p;
944     mse *top = local - 1;
945     unsigned i = 0;
946 
947     GC_ASSERT((word)high >= (word)(low - 1)
948               && (word)(high - low + 1) <= GC_mark_stack_size);
949     for (p = low; (word)p <= (word)high && i <= max; ++p) {
950         word descr = (word)AO_load(&p->mse_descr.ao);
951         if (descr != 0) {
952             /* Must be ordered after read of descr: */
953             AO_store_release_write(&p->mse_descr.ao, 0);
954             /* More than one thread may get this entry, but that's only */
955             /* a minor performance problem.                             */
956             ++top;
957             top -> mse_descr.w = descr;
958             top -> mse_start = p -> mse_start;
959             GC_ASSERT((top->mse_descr.w & GC_DS_TAGS) != GC_DS_LENGTH ||
960                       top->mse_descr.w < (word)GC_greatest_plausible_heap_addr
961                                          - (word)GC_least_plausible_heap_addr);
962             /* If this is a big object, count it as                     */
963             /* size/256 + 1 objects.                                    */
964             ++i;
965             if ((descr & GC_DS_TAGS) == GC_DS_LENGTH) i += (int)(descr >> 8);
966         }
967     }
968     *next = p;
969     return top;
970 }
971 
972 /* Copy back a local mark stack.        */
973 /* low and high are inclusive bounds.   */
GC_return_mark_stack(mse * low,mse * high)974 STATIC void GC_return_mark_stack(mse * low, mse * high)
975 {
976     mse * my_top;
977     mse * my_start;
978     size_t stack_size;
979 
980     if ((word)high < (word)low) return;
981     stack_size = high - low + 1;
982     GC_acquire_mark_lock();
983     my_top = GC_mark_stack_top; /* Concurrent modification impossible. */
984     my_start = my_top + 1;
985     if ((word)(my_start - GC_mark_stack + stack_size)
986                 > (word)GC_mark_stack_size) {
987       GC_COND_LOG_PRINTF("No room to copy back mark stack\n");
988       GC_mark_state = MS_INVALID;
989       GC_mark_stack_too_small = TRUE;
990       /* We drop the local mark stack.  We'll fix things later. */
991     } else {
992       BCOPY(low, my_start, stack_size * sizeof(mse));
993       GC_ASSERT((mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top))
994                 == my_top);
995       AO_store_release_write((volatile AO_t *)(&GC_mark_stack_top),
996                              (AO_t)(my_top + stack_size));
997                 /* Ensures visibility of previously written stack contents. */
998     }
999     GC_release_mark_lock();
1000     GC_notify_all_marker();
1001 }
1002 
1003 /* Mark from the local mark stack.              */
1004 /* On return, the local mark stack is empty.    */
1005 /* But this may be achieved by copying the      */
1006 /* local mark stack back into the global one.   */
GC_do_local_mark(mse * local_mark_stack,mse * local_top)1007 STATIC void GC_do_local_mark(mse *local_mark_stack, mse *local_top)
1008 {
1009     unsigned n;
1010 #   define N_LOCAL_ITERS 1
1011 
1012 #   ifdef GC_ASSERTIONS
1013       /* Make sure we don't hold mark lock. */
1014         GC_acquire_mark_lock();
1015         GC_release_mark_lock();
1016 #   endif
1017     for (;;) {
1018         for (n = 0; n < N_LOCAL_ITERS; ++n) {
1019             local_top = GC_mark_from(local_top, local_mark_stack,
1020                                      local_mark_stack + LOCAL_MARK_STACK_SIZE);
1021             if ((word)local_top < (word)local_mark_stack) return;
1022             if ((word)(local_top - local_mark_stack)
1023                         >= LOCAL_MARK_STACK_SIZE / 2) {
1024                 GC_return_mark_stack(local_mark_stack, local_top);
1025                 return;
1026             }
1027         }
1028         if ((word)AO_load((volatile AO_t *)&GC_mark_stack_top)
1029             < (word)AO_load(&GC_first_nonempty)
1030             && GC_active_count < GC_helper_count
1031             && (word)local_top > (word)(local_mark_stack + 1)) {
1032             /* Try to share the load, since the main stack is empty,    */
1033             /* and helper threads are waiting for a refill.             */
1034             /* The entries near the bottom of the stack are likely      */
1035             /* to require more work.  Thus we return those, even though */
1036             /* it's harder.                                             */
1037             mse * new_bottom = local_mark_stack
1038                                 + (local_top - local_mark_stack)/2;
1039             GC_ASSERT((word)new_bottom > (word)local_mark_stack
1040                       && (word)new_bottom < (word)local_top);
1041             GC_return_mark_stack(local_mark_stack, new_bottom - 1);
1042             memmove(local_mark_stack, new_bottom,
1043                     (local_top - new_bottom + 1) * sizeof(mse));
1044             local_top -= (new_bottom - local_mark_stack);
1045         }
1046     }
1047 }
1048 
1049 #define ENTRIES_TO_GET 5
1050 
1051 /* Mark using the local mark stack until the global mark stack is empty */
1052 /* and there are no active workers. Update GC_first_nonempty to reflect */
1053 /* progress.                                                            */
1054 /* Caller does not hold mark lock.                                      */
1055 /* Caller has already incremented GC_helper_count.  We decrement it,    */
1056 /* and maintain GC_active_count.                                        */
GC_mark_local(mse * local_mark_stack,int id)1057 STATIC void GC_mark_local(mse *local_mark_stack, int id)
1058 {
1059     mse * my_first_nonempty;
1060 
1061     GC_acquire_mark_lock();
1062     GC_active_count++;
1063     my_first_nonempty = (mse *)AO_load(&GC_first_nonempty);
1064     GC_ASSERT((word)GC_mark_stack <= (word)my_first_nonempty);
1065     GC_ASSERT((word)my_first_nonempty
1066         <= (word)AO_load((volatile AO_t *)&GC_mark_stack_top) + sizeof(mse));
1067     GC_VERBOSE_LOG_PRINTF("Starting mark helper %lu\n", (unsigned long)id);
1068     GC_release_mark_lock();
1069     for (;;) {
1070         size_t n_on_stack;
1071         unsigned n_to_get;
1072         mse * my_top;
1073         mse * local_top;
1074         mse * global_first_nonempty = (mse *)AO_load(&GC_first_nonempty);
1075 
1076         GC_ASSERT((word)my_first_nonempty >= (word)GC_mark_stack &&
1077                   (word)my_first_nonempty <=
1078                         (word)AO_load((volatile AO_t *)&GC_mark_stack_top)
1079                         + sizeof(mse));
1080         GC_ASSERT((word)global_first_nonempty >= (word)GC_mark_stack &&
1081                   (word)global_first_nonempty <=
1082                         (word)AO_load((volatile AO_t *)&GC_mark_stack_top)
1083                         + sizeof(mse));
1084         if ((word)my_first_nonempty < (word)global_first_nonempty) {
1085             my_first_nonempty = global_first_nonempty;
1086         } else if ((word)global_first_nonempty < (word)my_first_nonempty) {
1087             AO_compare_and_swap(&GC_first_nonempty,
1088                                 (AO_t) global_first_nonempty,
1089                                 (AO_t) my_first_nonempty);
1090             /* If this fails, we just go ahead, without updating        */
1091             /* GC_first_nonempty.                                       */
1092         }
1093         /* Perhaps we should also update GC_first_nonempty, if it */
1094         /* is less.  But that would require using atomic updates. */
1095         my_top = (mse *)AO_load_acquire((volatile AO_t *)(&GC_mark_stack_top));
1096         n_on_stack = my_top - my_first_nonempty + 1;
1097         if (0 == n_on_stack) {
1098             GC_acquire_mark_lock();
1099             my_top = GC_mark_stack_top;
1100                 /* Asynchronous modification impossible here,   */
1101                 /* since we hold mark lock.                     */
1102             n_on_stack = my_top - my_first_nonempty + 1;
1103             if (0 == n_on_stack) {
1104                 GC_active_count--;
1105                 GC_ASSERT(GC_active_count <= GC_helper_count);
1106                 /* Other markers may redeposit objects  */
1107                 /* on the stack.                                */
1108                 if (0 == GC_active_count) GC_notify_all_marker();
1109                 while (GC_active_count > 0
1110                        && (word)AO_load(&GC_first_nonempty)
1111                                 > (word)GC_mark_stack_top) {
1112                     /* We will be notified if either GC_active_count    */
1113                     /* reaches zero, or if more objects are pushed on   */
1114                     /* the global mark stack.                           */
1115                     GC_wait_marker();
1116                 }
1117                 if (GC_active_count == 0
1118                     && (word)AO_load(&GC_first_nonempty)
1119                         > (word)GC_mark_stack_top) {
1120                     GC_bool need_to_notify = FALSE;
1121                     /* The above conditions can't be falsified while we */
1122                     /* hold the mark lock, since neither                */
1123                     /* GC_active_count nor GC_mark_stack_top can        */
1124                     /* change.  GC_first_nonempty can only be           */
1125                     /* incremented asynchronously.  Thus we know that   */
1126                     /* both conditions actually held simultaneously.    */
1127                     GC_helper_count--;
1128                     if (0 == GC_helper_count) need_to_notify = TRUE;
1129                     GC_VERBOSE_LOG_PRINTF("Finished mark helper %lu\n",
1130                                           (unsigned long)id);
1131                     GC_release_mark_lock();
1132                     if (need_to_notify) GC_notify_all_marker();
1133                     return;
1134                 }
1135                 /* else there's something on the stack again, or        */
1136                 /* another helper may push something.                   */
1137                 GC_active_count++;
1138                 GC_ASSERT(GC_active_count > 0);
1139                 GC_release_mark_lock();
1140                 continue;
1141             } else {
1142                 GC_release_mark_lock();
1143             }
1144         }
1145         n_to_get = ENTRIES_TO_GET;
1146         if (n_on_stack < 2 * ENTRIES_TO_GET) n_to_get = 1;
1147         local_top = GC_steal_mark_stack(my_first_nonempty, my_top,
1148                                         local_mark_stack, n_to_get,
1149                                         &my_first_nonempty);
1150         GC_ASSERT((word)my_first_nonempty >= (word)GC_mark_stack &&
1151                   (word)my_first_nonempty <=
1152                         (word)AO_load((volatile AO_t *)&GC_mark_stack_top)
1153                         + sizeof(mse));
1154         GC_do_local_mark(local_mark_stack, local_top);
1155     }
1156 }
1157 
1158 GC_INNER mse GC_local_mark_stacks[MAX_MARKERS+1][LOCAL_MARK_STACK_SIZE];
1159 
1160 /* Perform Parallel mark.                       */
1161 /* We hold the GC lock, not the mark lock.      */
1162 /* Currently runs until the mark stack is       */
1163 /* empty.                                       */
GC_do_parallel_mark(void)1164 STATIC void GC_do_parallel_mark(void)
1165 {
1166     mse *local_mark_stack = &(GC_local_mark_stacks[0][0]);
1167                 /* Note: local_mark_stack is quite big (up to 128 KiB). */
1168 
1169     GC_acquire_mark_lock();
1170     GC_ASSERT(I_HOLD_LOCK());
1171     /* This could be a GC_ASSERT, but it seems safer to keep it on      */
1172     /* all the time, especially since it's cheap.                       */
1173     if (GC_help_wanted || GC_active_count != 0 || GC_helper_count != 0)
1174         ABORT("Tried to start parallel mark in bad state");
1175     GC_VERBOSE_LOG_PRINTF("Starting marking for mark phase number %lu\n",
1176                           (unsigned long)GC_mark_no);
1177     GC_first_nonempty = (AO_t)GC_mark_stack;
1178     GC_active_count = 0;
1179     GC_helper_count = 1;
1180     GC_help_wanted = TRUE;
1181     GC_release_mark_lock();
1182     GC_notify_all_marker();
1183         /* Wake up potential helpers.   */
1184     GC_mark_local(local_mark_stack, 0);
1185     GC_acquire_mark_lock();
1186     GC_help_wanted = FALSE;
1187     /* Done; clean up.  */
1188     while (GC_helper_count > 0) {
1189       GC_wait_marker();
1190     }
1191     /* GC_helper_count cannot be incremented while GC_help_wanted == FALSE */
1192     GC_VERBOSE_LOG_PRINTF("Finished marking for mark phase number %lu\n",
1193                           (unsigned long)GC_mark_no);
1194     GC_mark_no++;
1195     GC_release_mark_lock();
1196     GC_notify_all_marker();
1197 }
1198 
1199 /* Try to help out the marker, if it's running.         */
1200 /* We do not hold the GC lock, but the requestor does.  */
GC_help_marker(word my_mark_no)1201 GC_INNER void GC_help_marker(word my_mark_no)
1202 {
1203     unsigned my_id;
1204     mse *local_mark_stack;
1205                 /* Note: local_mark_stack is quite big (up to 128 KiB). */
1206 
1207     if (!GC_parallel) return;
1208 
1209     GC_acquire_mark_lock();
1210     while (GC_mark_no < my_mark_no
1211            || (!GC_help_wanted && GC_mark_no == my_mark_no)) {
1212       GC_wait_marker();
1213     }
1214     my_id = GC_helper_count;
1215     local_mark_stack = &(GC_local_mark_stacks[my_id][0]);
1216     if (GC_mark_no != my_mark_no || my_id > (unsigned)GC_markers_m1) {
1217       /* Second test is useful only if original threads can also        */
1218       /* act as helpers.  Under Linux they can't.                       */
1219       GC_release_mark_lock();
1220       return;
1221     }
1222     GC_helper_count = my_id + 1;
1223     GC_release_mark_lock();
1224     GC_mark_local(local_mark_stack, my_id);
1225     /* GC_mark_local decrements GC_helper_count. */
1226 }
1227 
1228 #endif /* PARALLEL_MARK */
1229 
1230 /* Allocate or reallocate space for mark stack of size n entries.  */
1231 /* May silently fail.                                              */
alloc_mark_stack(size_t n)1232 static void alloc_mark_stack(size_t n)
1233 {
1234     mse * new_stack = (mse *)GC_scratch_alloc(n * sizeof(struct GC_ms_entry));
1235 #   ifdef GWW_VDB
1236       /* Don't recycle a stack segment obtained with the wrong flags.   */
1237       /* Win32 GetWriteWatch requires the right kind of memory.         */
1238       static GC_bool GC_incremental_at_stack_alloc = FALSE;
1239       GC_bool recycle_old = (!GC_incremental || GC_incremental_at_stack_alloc);
1240 
1241       GC_incremental_at_stack_alloc = GC_incremental;
1242 #   else
1243 #     define recycle_old TRUE
1244 #   endif
1245 
1246     GC_mark_stack_too_small = FALSE;
1247     if (GC_mark_stack_size != 0) {
1248         if (new_stack != 0) {
1249           if (recycle_old) {
1250             /* Recycle old space */
1251               size_t page_offset = (word)GC_mark_stack & (GC_page_size - 1);
1252               size_t size = GC_mark_stack_size * sizeof(struct GC_ms_entry);
1253               size_t displ = 0;
1254 
1255               if (0 != page_offset) displ = GC_page_size - page_offset;
1256               size = (size - displ) & ~(GC_page_size - 1);
1257               if (size > 0) {
1258                 GC_add_to_heap((struct hblk *)
1259                                 ((word)GC_mark_stack + displ), (word)size);
1260               }
1261           }
1262           GC_mark_stack = new_stack;
1263           GC_mark_stack_size = n;
1264           /* FIXME: Do we need some way to reset GC_mark_stack_size?    */
1265           GC_mark_stack_limit = new_stack + n;
1266           GC_COND_LOG_PRINTF("Grew mark stack to %lu frames\n",
1267                              (unsigned long)GC_mark_stack_size);
1268         } else {
1269           WARN("Failed to grow mark stack to %" WARN_PRIdPTR " frames\n", n);
1270         }
1271     } else {
1272         if (new_stack == 0) {
1273             GC_err_printf("No space for mark stack\n");
1274             EXIT();
1275         }
1276         GC_mark_stack = new_stack;
1277         GC_mark_stack_size = n;
1278         GC_mark_stack_limit = new_stack + n;
1279     }
1280     GC_mark_stack_top = GC_mark_stack-1;
1281 }
1282 
GC_mark_init(void)1283 GC_INNER void GC_mark_init(void)
1284 {
1285     alloc_mark_stack(INITIAL_MARK_STACK_SIZE);
1286 #   ifdef PARALLEL_MARK
1287         GC_exclude_static_roots_inner(GC_local_mark_stacks,
1288 	    (char *)GC_local_mark_stacks + sizeof(GC_local_mark_stacks));
1289 #   endif
1290 }
1291 
1292 /*
1293  * Push all locations between b and t onto the mark stack.
1294  * b is the first location to be checked. t is one past the last
1295  * location to be checked.
1296  * Should only be used if there is no possibility of mark stack
1297  * overflow.
1298  */
GC_push_all(char * bottom,char * top)1299 GC_API void GC_CALL GC_push_all(char *bottom, char *top)
1300 {
1301     register word length;
1302 
1303     bottom = (char *)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1304     top = (char *)(((word) top) & ~(ALIGNMENT-1));
1305     if ((word)bottom >= (word)top) return;
1306 
1307     GC_mark_stack_top++;
1308     if ((word)GC_mark_stack_top >= (word)GC_mark_stack_limit) {
1309         ABORT("Unexpected mark stack overflow");
1310     }
1311     length = top - bottom;
1312 #   if GC_DS_TAGS > ALIGNMENT - 1
1313         length += GC_DS_TAGS;
1314         length &= ~GC_DS_TAGS;
1315 #   endif
1316     GC_mark_stack_top -> mse_start = bottom;
1317     GC_mark_stack_top -> mse_descr.w = length;
1318 }
1319 
1320 #ifndef GC_DISABLE_INCREMENTAL
1321 
1322   /* Analogous to the above, but push only those pages h with           */
1323   /* dirty_fn(h) != 0.  We use GC_push_all to actually push the block.  */
1324   /* Used both to selectively push dirty pages, or to push a block in   */
1325   /* piecemeal fashion, to allow for more marking concurrency.          */
1326   /* Will not overflow mark stack if GC_push_all pushes a small fixed   */
1327   /* number of entries.  (This is invoked only if GC_push_all pushes    */
1328   /* a single entry, or if it marks each object before pushing it, thus */
1329   /* ensuring progress in the event of a stack overflow.)               */
GC_push_selected(ptr_t bottom,ptr_t top,GC_bool (* dirty_fn)(struct hblk *))1330   STATIC void GC_push_selected(ptr_t bottom, ptr_t top,
1331                                GC_bool (*dirty_fn)(struct hblk *))
1332   {
1333     struct hblk * h;
1334 
1335     bottom = (ptr_t)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1336     top = (ptr_t)(((word) top) & ~(ALIGNMENT-1));
1337     if ((word)bottom >= (word)top) return;
1338 
1339     h = HBLKPTR(bottom + HBLKSIZE);
1340     if ((word)top <= (word)h) {
1341         if ((*dirty_fn)(h-1)) {
1342             GC_push_all(bottom, top);
1343         }
1344         return;
1345     }
1346     if ((*dirty_fn)(h-1)) {
1347         GC_push_all(bottom, (ptr_t)h);
1348     }
1349 
1350     while ((word)(h+1) <= (word)top) {
1351         if ((*dirty_fn)(h)) {
1352             if ((word)(GC_mark_stack_top - GC_mark_stack)
1353                 > 3 * GC_mark_stack_size / 4) {
1354                 /* Danger of mark stack overflow */
1355                 GC_push_all((ptr_t)h, top);
1356                 return;
1357             } else {
1358                 GC_push_all((ptr_t)h, (ptr_t)(h+1));
1359             }
1360         }
1361         h++;
1362     }
1363 
1364     if ((ptr_t)h != top && (*dirty_fn)(h)) {
1365        GC_push_all((ptr_t)h, top);
1366     }
1367     if ((word)GC_mark_stack_top >= (word)GC_mark_stack_limit) {
1368         ABORT("Unexpected mark stack overflow");
1369     }
1370   }
1371 
GC_push_conditional(char * bottom,char * top,int all)1372   GC_API void GC_CALL GC_push_conditional(char *bottom, char *top, int all)
1373   {
1374     if (!all) {
1375       GC_push_selected((ptr_t)bottom, (ptr_t)top, GC_page_was_dirty);
1376     } else {
1377 #     ifdef PROC_VDB
1378         if (GC_dirty_maintained) {
1379           /* Pages that were never dirtied cannot contain pointers.     */
1380           GC_push_selected((ptr_t)bottom, (ptr_t)top, GC_page_was_ever_dirty);
1381         } else
1382 #     endif
1383       /* else */ {
1384         GC_push_all(bottom, top);
1385       }
1386     }
1387   }
1388 #else
GC_push_conditional(char * bottom,char * top,int all GC_ATTR_UNUSED)1389   GC_API void GC_CALL GC_push_conditional(char *bottom, char *top,
1390                                           int all GC_ATTR_UNUSED)
1391   {
1392     GC_push_all(bottom, top);
1393   }
1394 #endif /* GC_DISABLE_INCREMENTAL */
1395 
1396 #if defined(MSWIN32) || defined(MSWINCE)
GC_push_one(word p)1397   void __cdecl GC_push_one(word p)
1398 #else
1399   void GC_push_one(word p)
1400 #endif
1401 {
1402     GC_PUSH_ONE_STACK(p, MARKED_FROM_REGISTER);
1403 }
1404 
GC_mark_and_push(void * obj,mse * mark_stack_ptr,mse * mark_stack_limit,void ** src GC_ATTR_UNUSED)1405 GC_API struct GC_ms_entry * GC_CALL GC_mark_and_push(void *obj,
1406                                                 mse *mark_stack_ptr,
1407                                                 mse *mark_stack_limit,
1408                                                 void ** src GC_ATTR_UNUSED)
1409 {
1410     hdr * hhdr;
1411 
1412     PREFETCH(obj);
1413     GET_HDR(obj, hhdr);
1414     if ((EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr), FALSE)
1415          && (!GC_all_interior_pointers
1416              || NULL == (hhdr = GC_find_header(GC_base(obj)))))
1417         || EXPECT(HBLK_IS_FREE(hhdr), FALSE)) {
1418       GC_ADD_TO_BLACK_LIST_NORMAL(obj, (ptr_t)src);
1419       return mark_stack_ptr;
1420     }
1421 
1422     PUSH_CONTENTS_HDR(obj, mark_stack_ptr /* modified */, mark_stack_limit,
1423                       (ptr_t)src, was_marked, hhdr, TRUE);
1424  was_marked:
1425     return mark_stack_ptr;
1426 }
1427 
1428 #if defined(MANUAL_VDB) && defined(THREADS)
1429   void GC_dirty(ptr_t p);
1430 #endif
1431 
1432 /* Mark and push (i.e. gray) a single object p onto the main    */
1433 /* mark stack.  Consider p to be valid if it is an interior     */
1434 /* pointer.                                                     */
1435 /* The object p has passed a preliminary pointer validity       */
1436 /* test, but we do not definitely know whether it is valid.     */
1437 /* Mark bits are NOT atomically updated.  Thus this must be the */
1438 /* only thread setting them.                                    */
1439 # if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
GC_mark_and_push_stack(ptr_t p,ptr_t source)1440     GC_INNER void GC_mark_and_push_stack(ptr_t p, ptr_t source)
1441 # else
1442     GC_INNER void GC_mark_and_push_stack(ptr_t p)
1443 #   define source ((ptr_t)0)
1444 # endif
1445 {
1446     hdr * hhdr;
1447     ptr_t r = p;
1448 
1449     PREFETCH(p);
1450     GET_HDR(p, hhdr);
1451     if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr), FALSE)) {
1452         if (hhdr != 0) {
1453           r = GC_base(p);
1454           hhdr = HDR(r);
1455         }
1456         if (hhdr == 0) {
1457             GC_ADD_TO_BLACK_LIST_STACK(p, source);
1458             return;
1459         }
1460     }
1461     if (EXPECT(HBLK_IS_FREE(hhdr), FALSE)) {
1462         GC_ADD_TO_BLACK_LIST_NORMAL(p, source);
1463         return;
1464     }
1465 #   if defined(MANUAL_VDB) && defined(THREADS)
1466       /* Pointer is on the stack.  We may have dirtied the object       */
1467       /* it points to, but not yet have called GC_dirty();              */
1468       GC_dirty(p);      /* Implicitly affects entire object.            */
1469 #   endif
1470     PUSH_CONTENTS_HDR(r, GC_mark_stack_top, GC_mark_stack_limit,
1471                       source, mark_and_push_exit, hhdr, FALSE);
1472   mark_and_push_exit: ;
1473     /* We silently ignore pointers to near the end of a block,  */
1474     /* which is very mildly suboptimal.                         */
1475     /* FIXME: We should probably add a header word to address   */
1476     /* this.                                                    */
1477 }
1478 # undef source
1479 
1480 # ifdef TRACE_BUF
1481 
1482 # define TRACE_ENTRIES 1000
1483 
1484 struct trace_entry {
1485     char * kind;
1486     word gc_no;
1487     word bytes_allocd;
1488     word arg1;
1489     word arg2;
1490 } GC_trace_buf[TRACE_ENTRIES];
1491 
1492 int GC_trace_buf_ptr = 0;
1493 
GC_add_trace_entry(char * kind,word arg1,word arg2)1494 void GC_add_trace_entry(char *kind, word arg1, word arg2)
1495 {
1496     GC_trace_buf[GC_trace_buf_ptr].kind = kind;
1497     GC_trace_buf[GC_trace_buf_ptr].gc_no = GC_gc_no;
1498     GC_trace_buf[GC_trace_buf_ptr].bytes_allocd = GC_bytes_allocd;
1499     GC_trace_buf[GC_trace_buf_ptr].arg1 = arg1 ^ 0x80000000;
1500     GC_trace_buf[GC_trace_buf_ptr].arg2 = arg2 ^ 0x80000000;
1501     GC_trace_buf_ptr++;
1502     if (GC_trace_buf_ptr >= TRACE_ENTRIES) GC_trace_buf_ptr = 0;
1503 }
1504 
GC_print_trace_inner(word gc_no)1505 void GC_print_trace_inner(word gc_no)
1506 {
1507     int i;
1508     struct trace_entry *p;
1509 
1510     for (i = GC_trace_buf_ptr-1; i != GC_trace_buf_ptr; i--) {
1511         if (i < 0) i = TRACE_ENTRIES-1;
1512         p = GC_trace_buf + i;
1513         if (p -> gc_no < gc_no || p -> kind == 0) {
1514             return;
1515         }
1516         GC_printf("Trace:%s (gc:%u, bytes:%lu) 0x%lX, 0x%lX\n",
1517                   p -> kind, (unsigned)p -> gc_no,
1518                   (unsigned long)p -> bytes_allocd,
1519                   (long)p->arg1 ^ 0x80000000L, (long)p->arg2 ^ 0x80000000L);
1520     }
1521     GC_printf("Trace incomplete\n");
1522 }
1523 
GC_print_trace(word gc_no)1524 void GC_print_trace(word gc_no)
1525 {
1526     DCL_LOCK_STATE;
1527 
1528     LOCK();
1529     GC_print_trace_inner(gc_no);
1530     UNLOCK();
1531 }
1532 
1533 # endif /* TRACE_BUF */
1534 
1535 /*
1536  * A version of GC_push_all that treats all interior pointers as valid
1537  * and scans the entire region immediately, in case the contents
1538  * change.
1539  */
GC_push_all_eager(ptr_t bottom,ptr_t top)1540 GC_INNER void GC_push_all_eager(ptr_t bottom, ptr_t top)
1541 {
1542     word * b = (word *)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1543     word * t = (word *)(((word) top) & ~(ALIGNMENT-1));
1544     register word *p;
1545     register word q;
1546     register word *lim;
1547     register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
1548     register ptr_t least_ha = GC_least_plausible_heap_addr;
1549 #   define GC_greatest_plausible_heap_addr greatest_ha
1550 #   define GC_least_plausible_heap_addr least_ha
1551 
1552     if (top == 0) return;
1553     /* check all pointers in range and push if they appear      */
1554     /* to be valid.                                             */
1555       lim = t - 1 /* longword */;
1556       for (p = b; (word)p <= (word)lim;
1557            p = (word *)(((ptr_t)p) + ALIGNMENT)) {
1558         q = *p;
1559         GC_PUSH_ONE_STACK(q, p);
1560       }
1561 #   undef GC_greatest_plausible_heap_addr
1562 #   undef GC_least_plausible_heap_addr
1563 }
1564 
GC_push_all_stack(ptr_t bottom,ptr_t top)1565 GC_INNER void GC_push_all_stack(ptr_t bottom, ptr_t top)
1566 {
1567 # if defined(THREADS) && defined(MPROTECT_VDB)
1568     GC_push_all_eager(bottom, top);
1569 # else
1570     if (!NEED_FIXUP_POINTER && GC_all_interior_pointers) {
1571       GC_push_all(bottom, top);
1572     } else {
1573       GC_push_all_eager(bottom, top);
1574     }
1575 # endif
1576 }
1577 
1578 #if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES) && \
1579     defined(MARK_BIT_PER_GRANULE)
1580 # if GC_GRANULE_WORDS == 1
1581 #   define USE_PUSH_MARKED_ACCELERATORS
1582 #   define PUSH_GRANULE(q) \
1583                 do { \
1584                   word qcontents = (q)[0]; \
1585                   GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1586                 } while (0)
1587 # elif GC_GRANULE_WORDS == 2
1588 #   define USE_PUSH_MARKED_ACCELERATORS
1589 #   define PUSH_GRANULE(q) \
1590                 do { \
1591                   word qcontents = (q)[0]; \
1592                   GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1593                   qcontents = (q)[1]; \
1594                   GC_PUSH_ONE_HEAP(qcontents, (q)+1, GC_mark_stack_top); \
1595                 } while (0)
1596 # elif GC_GRANULE_WORDS == 4
1597 #   define USE_PUSH_MARKED_ACCELERATORS
1598 #   define PUSH_GRANULE(q) \
1599                 do { \
1600                   word qcontents = (q)[0]; \
1601                   GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1602                   qcontents = (q)[1]; \
1603                   GC_PUSH_ONE_HEAP(qcontents, (q)+1, GC_mark_stack_top); \
1604                   qcontents = (q)[2]; \
1605                   GC_PUSH_ONE_HEAP(qcontents, (q)+2, GC_mark_stack_top); \
1606                   qcontents = (q)[3]; \
1607                   GC_PUSH_ONE_HEAP(qcontents, (q)+3, GC_mark_stack_top); \
1608                 } while (0)
1609 # endif
1610 #endif /* !USE_MARK_BYTES && MARK_BIT_PER_GRANULE */
1611 
1612 #ifdef USE_PUSH_MARKED_ACCELERATORS
1613 /* Push all objects reachable from marked objects in the given block */
1614 /* containing objects of size 1 granule.                             */
GC_push_marked1(struct hblk * h,hdr * hhdr)1615 STATIC void GC_push_marked1(struct hblk *h, hdr *hhdr)
1616 {
1617     word * mark_word_addr = &(hhdr->hb_marks[0]);
1618     word *p;
1619     word *plim;
1620     word *q;
1621     word mark_word;
1622 
1623     /* Allow registers to be used for some frequently accessed  */
1624     /* global variables.  Otherwise aliasing issues are likely  */
1625     /* to prevent that.                                         */
1626     ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
1627     ptr_t least_ha = GC_least_plausible_heap_addr;
1628     mse * mark_stack_top = GC_mark_stack_top;
1629     mse * mark_stack_limit = GC_mark_stack_limit;
1630 
1631 #   undef GC_mark_stack_top
1632 #   undef GC_mark_stack_limit
1633 #   define GC_mark_stack_top mark_stack_top
1634 #   define GC_mark_stack_limit mark_stack_limit
1635 #   define GC_greatest_plausible_heap_addr greatest_ha
1636 #   define GC_least_plausible_heap_addr least_ha
1637 
1638     p = (word *)(h->hb_body);
1639     plim = (word *)(((word)h) + HBLKSIZE);
1640 
1641     /* go through all words in block */
1642         while ((word)p < (word)plim) {
1643             mark_word = *mark_word_addr++;
1644             q = p;
1645             while(mark_word != 0) {
1646               if (mark_word & 1) {
1647                   PUSH_GRANULE(q);
1648               }
1649               q += GC_GRANULE_WORDS;
1650               mark_word >>= 1;
1651             }
1652             p += WORDSZ*GC_GRANULE_WORDS;
1653         }
1654 
1655 #   undef GC_greatest_plausible_heap_addr
1656 #   undef GC_least_plausible_heap_addr
1657 #   undef GC_mark_stack_top
1658 #   undef GC_mark_stack_limit
1659 #   define GC_mark_stack_limit GC_arrays._mark_stack_limit
1660 #   define GC_mark_stack_top GC_arrays._mark_stack_top
1661     GC_mark_stack_top = mark_stack_top;
1662 }
1663 
1664 
1665 #ifndef UNALIGNED_PTRS
1666 
1667 /* Push all objects reachable from marked objects in the given block */
1668 /* of size 2 (granules) objects.                                     */
GC_push_marked2(struct hblk * h,hdr * hhdr)1669 STATIC void GC_push_marked2(struct hblk *h, hdr *hhdr)
1670 {
1671     word * mark_word_addr = &(hhdr->hb_marks[0]);
1672     word *p;
1673     word *plim;
1674     word *q;
1675     word mark_word;
1676 
1677     ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
1678     ptr_t least_ha = GC_least_plausible_heap_addr;
1679     mse * mark_stack_top = GC_mark_stack_top;
1680     mse * mark_stack_limit = GC_mark_stack_limit;
1681 
1682 #   undef GC_mark_stack_top
1683 #   undef GC_mark_stack_limit
1684 #   define GC_mark_stack_top mark_stack_top
1685 #   define GC_mark_stack_limit mark_stack_limit
1686 #   define GC_greatest_plausible_heap_addr greatest_ha
1687 #   define GC_least_plausible_heap_addr least_ha
1688 
1689     p = (word *)(h->hb_body);
1690     plim = (word *)(((word)h) + HBLKSIZE);
1691 
1692     /* go through all words in block */
1693         while ((word)p < (word)plim) {
1694             mark_word = *mark_word_addr++;
1695             q = p;
1696             while(mark_word != 0) {
1697               if (mark_word & 1) {
1698                   PUSH_GRANULE(q);
1699                   PUSH_GRANULE(q + GC_GRANULE_WORDS);
1700               }
1701               q += 2 * GC_GRANULE_WORDS;
1702               mark_word >>= 2;
1703             }
1704             p += WORDSZ*GC_GRANULE_WORDS;
1705         }
1706 
1707 #   undef GC_greatest_plausible_heap_addr
1708 #   undef GC_least_plausible_heap_addr
1709 #   undef GC_mark_stack_top
1710 #   undef GC_mark_stack_limit
1711 #   define GC_mark_stack_limit GC_arrays._mark_stack_limit
1712 #   define GC_mark_stack_top GC_arrays._mark_stack_top
1713     GC_mark_stack_top = mark_stack_top;
1714 }
1715 
1716 # if GC_GRANULE_WORDS < 4
1717 /* Push all objects reachable from marked objects in the given block */
1718 /* of size 4 (granules) objects.                                     */
1719 /* There is a risk of mark stack overflow here.  But we handle that. */
1720 /* And only unmarked objects get pushed, so it's not very likely.    */
GC_push_marked4(struct hblk * h,hdr * hhdr)1721 STATIC void GC_push_marked4(struct hblk *h, hdr *hhdr)
1722 {
1723     word * mark_word_addr = &(hhdr->hb_marks[0]);
1724     word *p;
1725     word *plim;
1726     word *q;
1727     word mark_word;
1728 
1729     ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
1730     ptr_t least_ha = GC_least_plausible_heap_addr;
1731     mse * mark_stack_top = GC_mark_stack_top;
1732     mse * mark_stack_limit = GC_mark_stack_limit;
1733 
1734 #   undef GC_mark_stack_top
1735 #   undef GC_mark_stack_limit
1736 #   define GC_mark_stack_top mark_stack_top
1737 #   define GC_mark_stack_limit mark_stack_limit
1738 #   define GC_greatest_plausible_heap_addr greatest_ha
1739 #   define GC_least_plausible_heap_addr least_ha
1740 
1741     p = (word *)(h->hb_body);
1742     plim = (word *)(((word)h) + HBLKSIZE);
1743 
1744     /* go through all words in block */
1745         while ((word)p < (word)plim) {
1746             mark_word = *mark_word_addr++;
1747             q = p;
1748             while(mark_word != 0) {
1749               if (mark_word & 1) {
1750                   PUSH_GRANULE(q);
1751                   PUSH_GRANULE(q + GC_GRANULE_WORDS);
1752                   PUSH_GRANULE(q + 2*GC_GRANULE_WORDS);
1753                   PUSH_GRANULE(q + 3*GC_GRANULE_WORDS);
1754               }
1755               q += 4 * GC_GRANULE_WORDS;
1756               mark_word >>= 4;
1757             }
1758             p += WORDSZ*GC_GRANULE_WORDS;
1759         }
1760 #   undef GC_greatest_plausible_heap_addr
1761 #   undef GC_least_plausible_heap_addr
1762 #   undef GC_mark_stack_top
1763 #   undef GC_mark_stack_limit
1764 #   define GC_mark_stack_limit GC_arrays._mark_stack_limit
1765 #   define GC_mark_stack_top GC_arrays._mark_stack_top
1766     GC_mark_stack_top = mark_stack_top;
1767 }
1768 
1769 #endif /* GC_GRANULE_WORDS < 4 */
1770 
1771 #endif /* UNALIGNED_PTRS */
1772 
1773 #endif /* USE_PUSH_MARKED_ACCELERATORS */
1774 
1775 /* Push all objects reachable from marked objects in the given block */
GC_push_marked(struct hblk * h,hdr * hhdr)1776 STATIC void GC_push_marked(struct hblk *h, hdr *hhdr)
1777 {
1778     size_t sz = hhdr -> hb_sz;
1779     word descr = hhdr -> hb_descr;
1780     ptr_t p;
1781     word bit_no;
1782     ptr_t lim;
1783     mse * GC_mark_stack_top_reg;
1784     mse * mark_stack_limit = GC_mark_stack_limit;
1785 
1786     /* Some quick shortcuts: */
1787         if ((/* 0 | */ GC_DS_LENGTH) == descr) return;
1788         if (GC_block_empty(hhdr)/* nothing marked */) return;
1789     GC_n_rescuing_pages++;
1790     GC_objects_are_marked = TRUE;
1791     if (sz > MAXOBJBYTES) {
1792         lim = h -> hb_body;
1793     } else {
1794         lim = (h + 1)->hb_body - sz;
1795     }
1796 
1797     switch(BYTES_TO_GRANULES(sz)) {
1798 #   if defined(USE_PUSH_MARKED_ACCELERATORS)
1799      case 1:
1800        GC_push_marked1(h, hhdr);
1801        break;
1802 #    if !defined(UNALIGNED_PTRS)
1803        case 2:
1804          GC_push_marked2(h, hhdr);
1805          break;
1806 #     if GC_GRANULE_WORDS < 4
1807        case 4:
1808          GC_push_marked4(h, hhdr);
1809          break;
1810 #     endif
1811 #    endif
1812 #   endif
1813      default:
1814       GC_mark_stack_top_reg = GC_mark_stack_top;
1815       for (p = h -> hb_body, bit_no = 0; (word)p <= (word)lim;
1816            p += sz, bit_no += MARK_BIT_OFFSET(sz)) {
1817          if (mark_bit_from_hdr(hhdr, bit_no)) {
1818            /* Mark from fields inside the object */
1819              PUSH_OBJ(p, hhdr, GC_mark_stack_top_reg, mark_stack_limit);
1820          }
1821       }
1822       GC_mark_stack_top = GC_mark_stack_top_reg;
1823     }
1824 }
1825 
1826 #ifdef ENABLE_DISCLAIM
1827 /* Unconditionally mark from all objects which have not been reclaimed. */
1828 /* This is useful in order to retain pointers which are reachable from  */
1829 /* the disclaim notifiers.                                              */
1830 /*                                                                      */
1831 /* To determine whether an object has been reclaimed, we require that   */
1832 /* any live object has a non-zero as one of the two lowest bits of the  */
1833 /* first word.  On the other hand, a reclaimed object is a members of   */
1834 /* free-lists, and thus contains a word-aligned next-pointer as the     */
1835 /* first word.                                                          */
GC_push_unconditionally(struct hblk * h,hdr * hhdr)1836  STATIC void GC_push_unconditionally(struct hblk *h, hdr *hhdr)
1837  {
1838     size_t sz = hhdr -> hb_sz;
1839     word descr = hhdr -> hb_descr;
1840     ptr_t p;
1841     ptr_t lim;
1842     mse * GC_mark_stack_top_reg;
1843     mse * mark_stack_limit = GC_mark_stack_limit;
1844 
1845     if ((/* 0 | */ GC_DS_LENGTH) == descr)
1846         return;
1847 
1848     GC_n_rescuing_pages++;
1849     GC_objects_are_marked = TRUE;
1850     if (sz > MAXOBJBYTES)
1851         lim = h -> hb_body;
1852     else
1853         lim = (h + 1)->hb_body - sz;
1854 
1855     GC_mark_stack_top_reg = GC_mark_stack_top;
1856     for (p = h -> hb_body; (word)p <= (word)lim; p += sz)
1857         if ((*(GC_word *)p & 0x3) != 0)
1858             PUSH_OBJ(p, hhdr, GC_mark_stack_top_reg, mark_stack_limit);
1859     GC_mark_stack_top = GC_mark_stack_top_reg;
1860   }
1861 #endif /* ENABLE_DISCLAIM */
1862 
1863 #ifndef GC_DISABLE_INCREMENTAL
1864   /* Test whether any page in the given block is dirty.   */
GC_block_was_dirty(struct hblk * h,hdr * hhdr)1865   STATIC GC_bool GC_block_was_dirty(struct hblk *h, hdr *hhdr)
1866   {
1867     size_t sz = hhdr -> hb_sz;
1868 
1869     if (sz <= MAXOBJBYTES) {
1870          return(GC_page_was_dirty(h));
1871     } else {
1872          ptr_t p = (ptr_t)h;
1873          while ((word)p < (word)h + sz) {
1874              if (GC_page_was_dirty((struct hblk *)p)) return(TRUE);
1875              p += HBLKSIZE;
1876          }
1877          return(FALSE);
1878     }
1879   }
1880 #endif /* GC_DISABLE_INCREMENTAL */
1881 
1882 /* Similar to GC_push_marked, but skip over unallocated blocks  */
1883 /* and return address of next plausible block.                  */
GC_push_next_marked(struct hblk * h)1884 STATIC struct hblk * GC_push_next_marked(struct hblk *h)
1885 {
1886     hdr * hhdr = HDR(h);
1887 
1888     if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr) || HBLK_IS_FREE(hhdr), FALSE)) {
1889       h = GC_next_used_block(h);
1890       if (h == 0) return(0);
1891       hhdr = GC_find_header((ptr_t)h);
1892     }
1893     GC_push_marked(h, hhdr);
1894     return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
1895 }
1896 
1897 #ifndef GC_DISABLE_INCREMENTAL
1898   /* Identical to above, but mark only from dirty pages   */
GC_push_next_marked_dirty(struct hblk * h)1899   STATIC struct hblk * GC_push_next_marked_dirty(struct hblk *h)
1900   {
1901     hdr * hhdr = HDR(h);
1902 
1903     if (!GC_dirty_maintained) ABORT("Dirty bits not set up");
1904     for (;;) {
1905         if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr)
1906                    || HBLK_IS_FREE(hhdr), FALSE)) {
1907           h = GC_next_used_block(h);
1908           if (h == 0) return(0);
1909           hhdr = GC_find_header((ptr_t)h);
1910         }
1911 #       ifdef STUBBORN_ALLOC
1912           if (hhdr -> hb_obj_kind == STUBBORN) {
1913             if (GC_page_was_changed(h) && GC_block_was_dirty(h, hhdr)) {
1914                 break;
1915             }
1916           } else {
1917             if (GC_block_was_dirty(h, hhdr)) break;
1918           }
1919 #       else
1920           if (GC_block_was_dirty(h, hhdr)) break;
1921 #       endif
1922         h += OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz);
1923         hhdr = HDR(h);
1924     }
1925     GC_push_marked(h, hhdr);
1926     return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
1927   }
1928 #endif /* !GC_DISABLE_INCREMENTAL */
1929 
1930 /* Similar to above, but for uncollectible pages.  Needed since we      */
1931 /* do not clear marks for such pages, even for full collections.        */
GC_push_next_marked_uncollectable(struct hblk * h)1932 STATIC struct hblk * GC_push_next_marked_uncollectable(struct hblk *h)
1933 {
1934     hdr * hhdr = HDR(h);
1935 
1936     for (;;) {
1937         if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr)
1938                    || HBLK_IS_FREE(hhdr), FALSE)) {
1939           h = GC_next_used_block(h);
1940           if (h == 0) return(0);
1941           hhdr = GC_find_header((ptr_t)h);
1942         }
1943         if (hhdr -> hb_obj_kind == UNCOLLECTABLE) {
1944             GC_push_marked(h, hhdr);
1945             break;
1946         }
1947 #       ifdef ENABLE_DISCLAIM
1948             if ((hhdr -> hb_flags & MARK_UNCONDITIONALLY) != 0) {
1949                 GC_push_unconditionally(h, hhdr);
1950                 break;
1951             }
1952 #       endif
1953         h += OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz);
1954         hhdr = HDR(h);
1955     }
1956     return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
1957 }
1958