1 
2 /*
3  * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
4  * Copyright (c) 1991-1995 by Xerox Corporation.  All rights reserved.
5  * Copyright (c) 2000 by Hewlett-Packard Company.  All rights reserved.
6  *
7  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
8  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
9  *
10  * Permission is hereby granted to use or copy this program
11  * for any purpose,  provided the above notices are retained on all copies.
12  * Permission to modify the code and to distribute modified code is granted,
13  * provided the above notices are retained, and a notice that the code was
14  * modified is included with the above copyright notice.
15  *
16  */
17 
18 
19 # include <stdio.h>
20 # include "private/gc_pmark.h"
21 
22 #if defined(MSWIN32) && defined(__GNUC__)
23 # include <excpt.h>
24 #endif
25 
26 /* We put this here to minimize the risk of inlining. */
27 /*VARARGS*/
28 #ifdef __WATCOMC__
GC_noop(void * p,...)29   void GC_noop(void *p, ...) {}
30 #else
GC_noop()31   void GC_noop() {}
32 #endif
33 
34 /* Single argument version, robust against whole program analysis. */
GC_noop1(word x)35 void GC_noop1(word x)
36 {
37     static volatile word sink;
38 
39     sink = x;
40 }
41 
42 /* mark_proc GC_mark_procs[MAX_MARK_PROCS] = {0} -- declared in gc_priv.h */
43 
44 unsigned GC_n_mark_procs = GC_RESERVED_MARK_PROCS;
45 
46 /* Initialize GC_obj_kinds properly and standard free lists properly.  	*/
47 /* This must be done statically since they may be accessed before 	*/
48 /* GC_init is called.							*/
49 /* It's done here, since we need to deal with mark descriptors.		*/
50 struct obj_kind GC_obj_kinds[MAXOBJKINDS] = {
51 /* PTRFREE */ { &GC_aobjfreelist[0], 0 /* filled in dynamically */,
52 		0 | GC_DS_LENGTH, FALSE, FALSE },
53 /* NORMAL  */ { &GC_objfreelist[0], 0,
54 		0 | GC_DS_LENGTH,  /* Adjusted in GC_init_inner for EXTRA_BYTES */
55 		TRUE /* add length to descr */, TRUE },
56 /* UNCOLLECTABLE */
57 	      { &GC_uobjfreelist[0], 0,
58 		0 | GC_DS_LENGTH, TRUE /* add length to descr */, TRUE },
59 # ifdef ATOMIC_UNCOLLECTABLE
60    /* AUNCOLLECTABLE */
61 	      { &GC_auobjfreelist[0], 0,
62 		0 | GC_DS_LENGTH, FALSE /* add length to descr */, FALSE },
63 # endif
64 # ifdef STUBBORN_ALLOC
65 /*STUBBORN*/ { &GC_sobjfreelist[0], 0,
66 		0 | GC_DS_LENGTH, TRUE /* add length to descr */, TRUE },
67 # endif
68 };
69 
70 # ifdef ATOMIC_UNCOLLECTABLE
71 #   ifdef STUBBORN_ALLOC
72       unsigned GC_n_kinds = 5;
73 #   else
74       unsigned GC_n_kinds = 4;
75 #   endif
76 # else
77 #   ifdef STUBBORN_ALLOC
78       unsigned GC_n_kinds = 4;
79 #   else
80       unsigned GC_n_kinds = 3;
81 #   endif
82 # endif
83 
84 
85 # ifndef INITIAL_MARK_STACK_SIZE
86 #   define INITIAL_MARK_STACK_SIZE (1*HBLKSIZE)
87 		/* INITIAL_MARK_STACK_SIZE * sizeof(mse) should be a 	*/
88 		/* multiple of HBLKSIZE.				*/
89 		/* The incremental collector actually likes a larger	*/
90 		/* size, since it want to push all marked dirty objs	*/
91 		/* before marking anything new.  Currently we let it	*/
92 		/* grow dynamically.					*/
93 # endif
94 
95 /*
96  * Limits of stack for GC_mark routine.
97  * All ranges between GC_mark_stack(incl.) and GC_mark_stack_top(incl.) still
98  * need to be marked from.
99  */
100 
101 word GC_n_rescuing_pages;	/* Number of dirty pages we marked from */
102 				/* excludes ptrfree pages, etc.		*/
103 
104 mse * GC_mark_stack;
105 
106 mse * GC_mark_stack_limit;
107 
108 size_t GC_mark_stack_size = 0;
109 
110 #ifdef PARALLEL_MARK
111 # include "atomic_ops.h"
112 
113   mse * volatile GC_mark_stack_top;
114   /* Updated only with mark lock held, but read asynchronously.	*/
115   volatile AO_t GC_first_nonempty;
116   	/* Lowest entry on mark stack	*/
117 	/* that may be nonempty.	*/
118 	/* Updated only by initiating 	*/
119 	/* thread.			*/
120 #else
121   mse * GC_mark_stack_top;
122 #endif
123 
124 static struct hblk * scan_ptr;
125 
126 mark_state_t GC_mark_state = MS_NONE;
127 
128 GC_bool GC_mark_stack_too_small = FALSE;
129 
130 GC_bool GC_objects_are_marked = FALSE;	/* Are there collectable marked	*/
131 					/* objects in the heap?		*/
132 
133 /* Is a collection in progress?  Note that this can return true in the	*/
134 /* nonincremental case, if a collection has been abandoned and the	*/
135 /* mark state is now MS_INVALID.					*/
GC_collection_in_progress(void)136 GC_bool GC_collection_in_progress(void)
137 {
138     return(GC_mark_state != MS_NONE);
139 }
140 
141 /* clear all mark bits in the header */
GC_clear_hdr_marks(hdr * hhdr)142 void GC_clear_hdr_marks(hdr *hhdr)
143 {
144     size_t last_bit = FINAL_MARK_BIT(hhdr -> hb_sz);
145 
146 #   ifdef USE_MARK_BYTES
147       BZERO(hhdr -> hb_marks, MARK_BITS_SZ);
148       hhdr -> hb_marks[last_bit] = 1;
149 #   else
150       BZERO(hhdr -> hb_marks, MARK_BITS_SZ*sizeof(word));
151       set_mark_bit_from_hdr(hhdr, last_bit);
152 #   endif
153     hhdr -> hb_n_marks = 0;
154 }
155 
156 /* Set all mark bits in the header.  Used for uncollectable blocks. */
GC_set_hdr_marks(hdr * hhdr)157 void GC_set_hdr_marks(hdr *hhdr)
158 {
159     unsigned i;
160     size_t sz = hhdr -> hb_sz;
161     size_t n_marks = FINAL_MARK_BIT(sz);
162 
163 #   ifdef USE_MARK_BYTES
164       for (i = 0; i <= n_marks; i += MARK_BIT_OFFSET(sz)) {
165     	hhdr -> hb_marks[i] = 1;
166       }
167 #   else
168       for (i = 0; i < divWORDSZ(n_marks + WORDSZ); ++i) {
169     	hhdr -> hb_marks[i] = ONES;
170       }
171 #   endif
172 #   ifdef MARK_BIT_PER_OBJ
173       hhdr -> hb_n_marks = n_marks - 1;
174 #   else
175       hhdr -> hb_n_marks = HBLK_OBJS(sz);
176 #   endif
177 }
178 
179 /*
180  * Clear all mark bits associated with block h.
181  */
182 /*ARGSUSED*/
clear_marks_for_block(struct hblk * h,word dummy)183 static void clear_marks_for_block(struct hblk *h, word dummy)
184 {
185     register hdr * hhdr = HDR(h);
186 
187     if (IS_UNCOLLECTABLE(hhdr -> hb_obj_kind)) return;
188         /* Mark bit for these is cleared only once the object is 	*/
189         /* explicitly deallocated.  This either frees the block, or	*/
190         /* the bit is cleared once the object is on the free list.	*/
191     GC_clear_hdr_marks(hhdr);
192 }
193 
194 /* Slow but general routines for setting/clearing/asking about mark bits */
GC_set_mark_bit(ptr_t p)195 void GC_set_mark_bit(ptr_t p)
196 {
197     struct hblk *h = HBLKPTR(p);
198     hdr * hhdr = HDR(h);
199     word bit_no = MARK_BIT_NO(p - (ptr_t)h, hhdr -> hb_sz);
200 
201     if (!mark_bit_from_hdr(hhdr, bit_no)) {
202       set_mark_bit_from_hdr(hhdr, bit_no);
203       ++hhdr -> hb_n_marks;
204     }
205 }
206 
GC_clear_mark_bit(ptr_t p)207 void GC_clear_mark_bit(ptr_t p)
208 {
209     struct hblk *h = HBLKPTR(p);
210     hdr * hhdr = HDR(h);
211     word bit_no = MARK_BIT_NO(p - (ptr_t)h, hhdr -> hb_sz);
212 
213     if (mark_bit_from_hdr(hhdr, bit_no)) {
214       size_t n_marks;
215       clear_mark_bit_from_hdr(hhdr, bit_no);
216       n_marks = hhdr -> hb_n_marks - 1;
217 #     ifdef PARALLEL_MARK
218         if (n_marks != 0)
219           hhdr -> hb_n_marks = n_marks;
220         /* Don't decrement to zero.  The counts are approximate due to	*/
221         /* concurrency issues, but we need to ensure that a count of 	*/
222         /* zero implies an empty block.					*/
223 #     else
224           hhdr -> hb_n_marks = n_marks;
225 #     endif
226     }
227 }
228 
GC_is_marked(ptr_t p)229 GC_bool GC_is_marked(ptr_t p)
230 {
231     struct hblk *h = HBLKPTR(p);
232     hdr * hhdr = HDR(h);
233     word bit_no = MARK_BIT_NO(p - (ptr_t)h, hhdr -> hb_sz);
234 
235     return((GC_bool)mark_bit_from_hdr(hhdr, bit_no));
236 }
237 
238 
239 /*
240  * Clear mark bits in all allocated heap blocks.  This invalidates
241  * the marker invariant, and sets GC_mark_state to reflect this.
242  * (This implicitly starts marking to reestablish the invariant.)
243  */
GC_clear_marks(void)244 void GC_clear_marks(void)
245 {
246     GC_apply_to_all_blocks(clear_marks_for_block, (word)0);
247     GC_objects_are_marked = FALSE;
248     GC_mark_state = MS_INVALID;
249     scan_ptr = 0;
250 }
251 
252 /* Initiate a garbage collection.  Initiates a full collection if the	*/
253 /* mark	state is invalid.						*/
254 /*ARGSUSED*/
GC_initiate_gc(void)255 void GC_initiate_gc(void)
256 {
257     if (GC_dirty_maintained) GC_read_dirty();
258 #   ifdef STUBBORN_ALLOC
259     	GC_read_changed();
260 #   endif
261 #   ifdef CHECKSUMS
262 	{
263 	    extern void GC_check_dirty();
264 
265 	    if (GC_dirty_maintained) GC_check_dirty();
266 	}
267 #   endif
268     GC_n_rescuing_pages = 0;
269     if (GC_mark_state == MS_NONE) {
270         GC_mark_state = MS_PUSH_RESCUERS;
271     } else if (GC_mark_state != MS_INVALID) {
272     	ABORT("unexpected state");
273     } /* else this is really a full collection, and mark	*/
274       /* bits are invalid.					*/
275     scan_ptr = 0;
276 }
277 
278 
279 static void alloc_mark_stack(size_t);
280 
281 # if defined(MSWIN32) || defined(USE_PROC_FOR_LIBRARIES) && defined(THREADS)
282     /* Under rare conditions, we may end up marking from nonexistent memory. */
283     /* Hence we need to be prepared to recover by running GC_mark_some	     */
284     /* with a suitable handler in place.				     */
285 #   define WRAP_MARK_SOME
286 # endif
287 
288 /* Perform a small amount of marking.			*/
289 /* We try to touch roughly a page of memory.		*/
290 /* Return TRUE if we just finished a mark phase.	*/
291 /* Cold_gc_frame is an address inside a GC frame that	*/
292 /* remains valid until all marking is complete.		*/
293 /* A zero value indicates that it's OK to miss some	*/
294 /* register values.					*/
295 /* We hold the allocation lock.  In the case of 	*/
296 /* incremental collection, the world may not be stopped.*/
297 #ifdef WRAP_MARK_SOME
298   /* For win32, this is called after we establish a structured	*/
299   /* exception handler, in case Windows unmaps one of our root	*/
300   /* segments.  See below.  In either case, we acquire the 	*/
301   /* allocator lock long before we get here.			*/
GC_mark_some_inner(ptr_t cold_gc_frame)302   GC_bool GC_mark_some_inner(ptr_t cold_gc_frame)
303 #else
304   GC_bool GC_mark_some(ptr_t cold_gc_frame)
305 #endif
306 {
307     switch(GC_mark_state) {
308     	case MS_NONE:
309     	    return(FALSE);
310 
311     	case MS_PUSH_RESCUERS:
312     	    if (GC_mark_stack_top
313     	        >= GC_mark_stack_limit - INITIAL_MARK_STACK_SIZE/2) {
314 		/* Go ahead and mark, even though that might cause us to */
315 		/* see more marked dirty objects later on.  Avoid this	 */
316 		/* in the future.					 */
317 		GC_mark_stack_too_small = TRUE;
318     	        MARK_FROM_MARK_STACK();
319     	        return(FALSE);
320     	    } else {
321     	        scan_ptr = GC_push_next_marked_dirty(scan_ptr);
322     	        if (scan_ptr == 0) {
323 		    if (GC_print_stats) {
324 			GC_log_printf("Marked from %u dirty pages\n",
325 				      GC_n_rescuing_pages);
326 		    }
327     	    	    GC_push_roots(FALSE, cold_gc_frame);
328     	    	    GC_objects_are_marked = TRUE;
329     	    	    if (GC_mark_state != MS_INVALID) {
330     	    	        GC_mark_state = MS_ROOTS_PUSHED;
331     	    	    }
332     	    	}
333     	    }
334     	    return(FALSE);
335 
336     	case MS_PUSH_UNCOLLECTABLE:
337     	    if (GC_mark_stack_top
338     	        >= GC_mark_stack + GC_mark_stack_size/4) {
339 #		ifdef PARALLEL_MARK
340 		  /* Avoid this, since we don't parallelize the marker	*/
341 		  /* here.						*/
342 		  if (GC_parallel) GC_mark_stack_too_small = TRUE;
343 #		endif
344     	        MARK_FROM_MARK_STACK();
345     	        return(FALSE);
346     	    } else {
347     	        scan_ptr = GC_push_next_marked_uncollectable(scan_ptr);
348     	        if (scan_ptr == 0) {
349     	    	    GC_push_roots(TRUE, cold_gc_frame);
350     	    	    GC_objects_are_marked = TRUE;
351     	    	    if (GC_mark_state != MS_INVALID) {
352     	    	        GC_mark_state = MS_ROOTS_PUSHED;
353     	    	    }
354     	    	}
355     	    }
356     	    return(FALSE);
357 
358     	case MS_ROOTS_PUSHED:
359 #	    ifdef PARALLEL_MARK
360 	      /* In the incremental GC case, this currently doesn't	*/
361 	      /* quite do the right thing, since it runs to		*/
362 	      /* completion.  On the other hand, starting a		*/
363 	      /* parallel marker is expensive, so perhaps it is		*/
364 	      /* the right thing?					*/
365 	      /* Eventually, incremental marking should run		*/
366 	      /* asynchronously in multiple threads, without grabbing	*/
367 	      /* the allocation lock.					*/
368 	        if (GC_parallel) {
369 		  GC_do_parallel_mark();
370 		  GC_ASSERT(GC_mark_stack_top < (mse *)GC_first_nonempty);
371 		  GC_mark_stack_top = GC_mark_stack - 1;
372     	          if (GC_mark_stack_too_small) {
373     	            alloc_mark_stack(2*GC_mark_stack_size);
374     	          }
375 		  if (GC_mark_state == MS_ROOTS_PUSHED) {
376     	            GC_mark_state = MS_NONE;
377     	            return(TRUE);
378 		  } else {
379 		    return(FALSE);
380 	          }
381 		}
382 #	    endif
383     	    if (GC_mark_stack_top >= GC_mark_stack) {
384     	        MARK_FROM_MARK_STACK();
385     	        return(FALSE);
386     	    } else {
387     	        GC_mark_state = MS_NONE;
388     	        if (GC_mark_stack_too_small) {
389     	            alloc_mark_stack(2*GC_mark_stack_size);
390     	        }
391     	        return(TRUE);
392     	    }
393 
394     	case MS_INVALID:
395     	case MS_PARTIALLY_INVALID:
396 	    if (!GC_objects_are_marked) {
397 		GC_mark_state = MS_PUSH_UNCOLLECTABLE;
398 		return(FALSE);
399 	    }
400     	    if (GC_mark_stack_top >= GC_mark_stack) {
401     	        MARK_FROM_MARK_STACK();
402     	        return(FALSE);
403     	    }
404     	    if (scan_ptr == 0 && GC_mark_state == MS_INVALID) {
405 		/* About to start a heap scan for marked objects. */
406 		/* Mark stack is empty.  OK to reallocate.	  */
407 		if (GC_mark_stack_too_small) {
408     	            alloc_mark_stack(2*GC_mark_stack_size);
409 		}
410 		GC_mark_state = MS_PARTIALLY_INVALID;
411     	    }
412     	    scan_ptr = GC_push_next_marked(scan_ptr);
413     	    if (scan_ptr == 0 && GC_mark_state == MS_PARTIALLY_INVALID) {
414     	    	GC_push_roots(TRUE, cold_gc_frame);
415     	    	GC_objects_are_marked = TRUE;
416     	    	if (GC_mark_state != MS_INVALID) {
417     	    	    GC_mark_state = MS_ROOTS_PUSHED;
418     	    	}
419     	    }
420     	    return(FALSE);
421     	default:
422     	    ABORT("GC_mark_some: bad state");
423     	    return(FALSE);
424     }
425 }
426 
427 
428 #if defined(MSWIN32) && defined(__GNUC__)
429 
430     typedef struct {
431       EXCEPTION_REGISTRATION ex_reg;
432       void *alt_path;
433     } ext_ex_regn;
434 
435 
mark_ex_handler(struct _EXCEPTION_RECORD * ex_rec,void * est_frame,struct _CONTEXT * context,void * disp_ctxt)436     static EXCEPTION_DISPOSITION mark_ex_handler(
437         struct _EXCEPTION_RECORD *ex_rec,
438         void *est_frame,
439         struct _CONTEXT *context,
440         void *disp_ctxt)
441     {
442         if (ex_rec->ExceptionCode == STATUS_ACCESS_VIOLATION) {
443           ext_ex_regn *xer = (ext_ex_regn *)est_frame;
444 
445           /* Unwind from the inner function assuming the standard */
446           /* function prologue.                                   */
447           /* Assumes code has not been compiled with              */
448           /* -fomit-frame-pointer.                                */
449           context->Esp = context->Ebp;
450           context->Ebp = *((DWORD *)context->Esp);
451           context->Esp = context->Esp - 8;
452 
453           /* Resume execution at the "real" handler within the    */
454           /* wrapper function.                                    */
455           context->Eip = (DWORD )(xer->alt_path);
456 
457           return ExceptionContinueExecution;
458 
459         } else {
460             return ExceptionContinueSearch;
461         }
462     }
463 # endif /* __GNUC__  && MSWIN32 */
464 
465 #ifdef GC_WIN32_THREADS
466   extern GC_bool GC_started_thread_while_stopped(void);
467   /* In win32_threads.c.  Did we invalidate mark phase with an	*/
468   /* unexpected thread start?					*/
469 #endif
470 
471 # ifdef WRAP_MARK_SOME
GC_mark_some(ptr_t cold_gc_frame)472   GC_bool GC_mark_some(ptr_t cold_gc_frame)
473   {
474       GC_bool ret_val;
475 
476 #   ifdef MSWIN32
477 #    ifndef __GNUC__
478       /* Windows 98 appears to asynchronously create and remove  */
479       /* writable memory mappings, for reasons we haven't yet    */
480       /* understood.  Since we look for writable regions to      */
481       /* determine the root set, we may try to mark from an      */
482       /* address range that disappeared since we started the     */
483       /* collection.  Thus we have to recover from faults here.  */
484       /* This code does not appear to be necessary for Windows   */
485       /* 95/NT/2000. Note that this code should never generate   */
486       /* an incremental GC write fault.                          */
487       /* It's conceivable that this is the same issue with	 */
488       /* terminating threads that we see with Linux and		 */
489       /* USE_PROC_FOR_LIBRARIES.				 */
490 
491       __try {
492           ret_val = GC_mark_some_inner(cold_gc_frame);
493       } __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
494                 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
495 	  goto handle_ex;
496       }
497 #     ifdef GC_WIN32_THREADS
498 	/* With DllMain-based thread tracking, a thread may have	*/
499 	/* started while we were marking.  This is logically equivalent	*/
500 	/* to the exception case; our results are invalid and we have	*/
501 	/* to start over.  This cannot be prevented since we can't	*/
502 	/* block in DllMain.						*/
503 	if (GC_started_thread_while_stopped()) goto handle_ex;
504 #     endif
505      rm_handler:
506       return ret_val;
507 
508 #    else /* __GNUC__ */
509 
510       /* Manually install an exception handler since GCC does    */
511       /* not yet support Structured Exception Handling (SEH) on  */
512       /* Win32.                                                  */
513 
514       ext_ex_regn er;
515 
516       er.alt_path = &&handle_ex;
517       er.ex_reg.handler = mark_ex_handler;
518       asm volatile ("movl %%fs:0, %0" : "=r" (er.ex_reg.prev));
519       asm volatile ("movl %0, %%fs:0" : : "r" (&er));
520       ret_val = GC_mark_some_inner(cold_gc_frame);
521       /* Prevent GCC from considering the following code unreachable */
522       /* and thus eliminating it.                                    */
523         if (er.alt_path == 0)
524           goto handle_ex;
525     rm_handler:
526       /* Uninstall the exception handler */
527       asm volatile ("mov %0, %%fs:0" : : "r" (er.ex_reg.prev));
528       return ret_val;
529 
530 #    endif /* __GNUC__ */
531 #   else /* !MSWIN32 */
532       /* Here we are handling the case in which /proc is used for root	*/
533       /* finding, and we have threads.  We may find a stack for a	*/
534       /* thread that is in the process of exiting, and disappears	*/
535       /* while we are marking it.  This seems extremely difficult to	*/
536       /* avoid otherwise.						*/
537       if (GC_incremental)
538 	      WARN("Incremental GC incompatible with /proc roots\n", 0);
539       	/* I'm not sure if this could still work ...	*/
540       GC_setup_temporary_fault_handler();
541       if(SETJMP(GC_jmp_buf) != 0) goto handle_ex;
542       ret_val = GC_mark_some_inner(cold_gc_frame);
543     rm_handler:
544       GC_reset_fault_handler();
545       return ret_val;
546 
547 #   endif /* !MSWIN32 */
548 
549 handle_ex:
550     /* Exception handler starts here for all cases. */
551       if (GC_print_stats) {
552         GC_log_printf("Caught ACCESS_VIOLATION in marker. "
553 		      "Memory mapping disappeared.\n");
554       }
555 
556       /* We have bad roots on the stack.  Discard mark stack.	*/
557       /* Rescan from marked objects.  Redetermine roots.	*/
558       GC_invalidate_mark_state();
559       scan_ptr = 0;
560 
561       ret_val = FALSE;
562       goto rm_handler;  // Back to platform-specific code.
563   }
564 #endif /* WRAP_MARK_SOME */
565 
566 
GC_mark_stack_empty(void)567 GC_bool GC_mark_stack_empty(void)
568 {
569     return(GC_mark_stack_top < GC_mark_stack);
570 }
571 
GC_invalidate_mark_state(void)572 void GC_invalidate_mark_state(void)
573 {
574     GC_mark_state = MS_INVALID;
575     GC_mark_stack_top = GC_mark_stack-1;
576 }
577 
GC_signal_mark_stack_overflow(mse * msp)578 mse * GC_signal_mark_stack_overflow(mse *msp)
579 {
580     GC_mark_state = MS_INVALID;
581     GC_mark_stack_too_small = TRUE;
582     if (GC_print_stats) {
583 	GC_log_printf("Mark stack overflow; current size = %lu entries\n",
584 	    	      GC_mark_stack_size);
585     }
586     return(msp - GC_MARK_STACK_DISCARDS);
587 }
588 
589 /*
590  * Mark objects pointed to by the regions described by
591  * mark stack entries between mark_stack and mark_stack_top,
592  * inclusive.  Assumes the upper limit of a mark stack entry
593  * is never 0.  A mark stack entry never has size 0.
594  * We try to traverse on the order of a hblk of memory before we return.
595  * Caller is responsible for calling this until the mark stack is empty.
596  * Note that this is the most performance critical routine in the
597  * collector.  Hence it contains all sorts of ugly hacks to speed
598  * things up.  In particular, we avoid procedure calls on the common
599  * path, we take advantage of peculiarities of the mark descriptor
600  * encoding, we optionally maintain a cache for the block address to
601  * header mapping, we prefetch when an object is "grayed", etc.
602  */
GC_mark_from(mse * mark_stack_top,mse * mark_stack,mse * mark_stack_limit)603 mse * GC_mark_from(mse *mark_stack_top, mse *mark_stack, mse *mark_stack_limit)
604 {
605   signed_word credit = HBLKSIZE;  /* Remaining credit for marking work	*/
606   ptr_t current_p;	/* Pointer to current candidate ptr.	*/
607   word current;	/* Candidate pointer.			*/
608   ptr_t limit;	/* (Incl) limit of current candidate 	*/
609   				/* range				*/
610   word descr;
611   ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
612   ptr_t least_ha = GC_least_plausible_heap_addr;
613   DECLARE_HDR_CACHE;
614 
615 # define SPLIT_RANGE_WORDS 128  /* Must be power of 2.		*/
616 
617   GC_objects_are_marked = TRUE;
618   INIT_HDR_CACHE;
619 # ifdef OS2 /* Use untweaked version to circumvent compiler problem */
620   while (mark_stack_top >= mark_stack && credit >= 0) {
621 # else
622   while ((((ptr_t)mark_stack_top - (ptr_t)mark_stack) | credit)
623   	>= 0) {
624 # endif
625     current_p = mark_stack_top -> mse_start;
626     descr = mark_stack_top -> mse_descr;
627   retry:
628     /* current_p and descr describe the current object.		*/
629     /* *mark_stack_top is vacant.				*/
630     /* The following is 0 only for small objects described by a simple	*/
631     /* length descriptor.  For many applications this is the common	*/
632     /* case, so we try to detect it quickly.				*/
633     if (descr & ((~(WORDS_TO_BYTES(SPLIT_RANGE_WORDS) - 1)) | GC_DS_TAGS)) {
634       word tag = descr & GC_DS_TAGS;
635 
636       switch(tag) {
637         case GC_DS_LENGTH:
638           /* Large length.					        */
639           /* Process part of the range to avoid pushing too much on the	*/
640           /* stack.							*/
641 	  GC_ASSERT(descr < (word)GC_greatest_plausible_heap_addr
642 			    - (word)GC_least_plausible_heap_addr);
643 #	  ifdef ENABLE_TRACE
644 	    if (GC_trace_addr >= current_p
645 		&& GC_trace_addr < current_p + descr) {
646 		GC_log_printf("GC:%d Large section; start %p len %lu\n",
647 			      GC_gc_no, current_p, (unsigned long) descr);
648 	    }
649 #	  endif /* ENABLE_TRACE */
650 #	  ifdef PARALLEL_MARK
651 #	    define SHARE_BYTES 2048
652 	    if (descr > SHARE_BYTES && GC_parallel
653 		&& mark_stack_top < mark_stack_limit - 1) {
654 	      int new_size = (descr/2) & ~(sizeof(word)-1);
655 	      mark_stack_top -> mse_start = current_p;
656 	      mark_stack_top -> mse_descr = new_size + sizeof(word);
657 					/* makes sure we handle 	*/
658 					/* misaligned pointers.		*/
659 	      mark_stack_top++;
660 #	      ifdef ENABLE_TRACE
661 	        if (GC_trace_addr >= current_p
662 		    && GC_trace_addr < current_p + descr) {
663 		    GC_log_printf("GC:%d splitting (parallel) %p at %p\n",
664 				  GC_gc_no, current_p, current_p + new_size);
665 	        }
666 #	      endif /* ENABLE_TRACE */
667 	      current_p += new_size;
668 	      descr -= new_size;
669 	      goto retry;
670 	    }
671 #	  endif /* PARALLEL_MARK */
672           mark_stack_top -> mse_start =
673          	limit = current_p + WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1);
674           mark_stack_top -> mse_descr =
675           		descr - WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1);
676 #	  ifdef ENABLE_TRACE
677 	    if (GC_trace_addr >= current_p
678 		&& GC_trace_addr < current_p + descr) {
679 		GC_log_printf("GC:%d splitting %p at %p\n",
680 			      GC_gc_no, current_p, limit);
681 	    }
682 #	  endif /* ENABLE_TRACE */
683           /* Make sure that pointers overlapping the two ranges are	*/
684           /* considered. 						*/
685           limit += sizeof(word) - ALIGNMENT;
686           break;
687         case GC_DS_BITMAP:
688           mark_stack_top--;
689 #	  ifdef ENABLE_TRACE
690 	    if (GC_trace_addr >= current_p
691 		&& GC_trace_addr < current_p + WORDS_TO_BYTES(WORDSZ-2)) {
692 		GC_log_printf("GC:%d Tracing from %p bitmap descr %lu\n",
693 			      GC_gc_no, current_p, (unsigned long) descr);
694 	    }
695 #	  endif /* ENABLE_TRACE */
696           descr &= ~GC_DS_TAGS;
697           credit -= WORDS_TO_BYTES(WORDSZ/2); /* guess */
698           while (descr != 0) {
699             if ((signed_word)descr < 0) {
700               current = *(word *)current_p;
701 	      FIXUP_POINTER(current);
702 	      if ((ptr_t)current >= least_ha && (ptr_t)current < greatest_ha) {
703 		PREFETCH((ptr_t)current);
704 #               ifdef ENABLE_TRACE
705 	          if (GC_trace_addr == current_p) {
706 	            GC_log_printf("GC:%d Considering(3) %p -> %p\n",
707 			          GC_gc_no, current_p, (ptr_t) current);
708 	          }
709 #               endif /* ENABLE_TRACE */
710                 PUSH_CONTENTS((ptr_t)current, mark_stack_top,
711 			      mark_stack_limit, current_p, exit1);
712 	      }
713             }
714 	    descr <<= 1;
715 	    current_p += sizeof(word);
716           }
717           continue;
718         case GC_DS_PROC:
719           mark_stack_top--;
720 #	  ifdef ENABLE_TRACE
721 	    if (GC_trace_addr >= current_p
722 		&& GC_base(current_p) != 0
723 		&& GC_base(current_p) == GC_base(GC_trace_addr)) {
724 		GC_log_printf("GC:%d Tracing from %p proc descr %lu\n",
725 			      GC_gc_no, current_p, (unsigned long) descr);
726 	    }
727 #	  endif /* ENABLE_TRACE */
728           credit -= GC_PROC_BYTES;
729           mark_stack_top =
730               (*PROC(descr))
731               	    ((word *)current_p, mark_stack_top,
732               	    mark_stack_limit, ENV(descr));
733           continue;
734         case GC_DS_PER_OBJECT:
735 	  if ((signed_word)descr >= 0) {
736 	    /* Descriptor is in the object.	*/
737             descr = *(word *)(current_p + descr - GC_DS_PER_OBJECT);
738 	  } else {
739 	    /* Descriptor is in type descriptor pointed to by first	*/
740 	    /* word in object.						*/
741 	    ptr_t type_descr = *(ptr_t *)current_p;
742 	    /* type_descr is either a valid pointer to the descriptor	*/
743 	    /* structure, or this object was on a free list.  If it 	*/
744 	    /* it was anything but the last object on the free list,	*/
745 	    /* we will misinterpret the next object on the free list as */
746 	    /* the type descriptor, and get a 0 GC descriptor, which	*/
747 	    /* is ideal.  Unfortunately, we need to check for the last	*/
748 	    /* object case explicitly.					*/
749 	    if (0 == type_descr) {
750 		/* Rarely executed.	*/
751 		mark_stack_top--;
752 		continue;
753 	    }
754             descr = *(word *)(type_descr
755 			      - (descr - (GC_DS_PER_OBJECT
756 					  - GC_INDIR_PER_OBJ_BIAS)));
757 	  }
758 	  if (0 == descr) {
759 	      /* Can happen either because we generated a 0 descriptor	*/
760 	      /* or we saw a pointer to a free object.		*/
761 	      mark_stack_top--;
762 	      continue;
763 	  }
764           goto retry;
765       }
766     } else /* Small object with length descriptor */ {
767       mark_stack_top--;
768       limit = current_p + (word)descr;
769     }
770 #   ifdef ENABLE_TRACE
771 	if (GC_trace_addr >= current_p
772 	    && GC_trace_addr < limit) {
773 	    GC_log_printf("GC:%d Tracing from %p len %lu\n",
774 			  GC_gc_no, current_p, (unsigned long) descr);
775 	}
776 #   endif /* ENABLE_TRACE */
777     /* The simple case in which we're scanning a range.	*/
778     GC_ASSERT(!((word)current_p & (ALIGNMENT-1)));
779     credit -= limit - current_p;
780     limit -= sizeof(word);
781     {
782 #     define PREF_DIST 4
783 
784 #     ifndef SMALL_CONFIG
785         word deferred;
786 
787 	/* Try to prefetch the next pointer to be examined asap.	*/
788 	/* Empirically, this also seems to help slightly without	*/
789 	/* prefetches, at least on linux/X86.  Presumably this loop 	*/
790 	/* ends up with less register pressure, and gcc thus ends up 	*/
791 	/* generating slightly better code.  Overall gcc code quality	*/
792 	/* for this loop is still not great.				*/
793 	for(;;) {
794 	  PREFETCH(limit - PREF_DIST*CACHE_LINE_SIZE);
795 	  GC_ASSERT(limit >= current_p);
796 	  deferred = *(word *)limit;
797 	  FIXUP_POINTER(deferred);
798 	  limit -= ALIGNMENT;
799 	  if ((ptr_t)deferred >= least_ha && (ptr_t)deferred <  greatest_ha) {
800 	    PREFETCH((ptr_t)deferred);
801 	    break;
802 	  }
803 	  if (current_p > limit) goto next_object;
804 	  /* Unroll once, so we don't do too many of the prefetches 	*/
805 	  /* based on limit.						*/
806 	  deferred = *(word *)limit;
807 	  FIXUP_POINTER(deferred);
808 	  limit -= ALIGNMENT;
809 	  if ((ptr_t)deferred >= least_ha && (ptr_t)deferred <  greatest_ha) {
810 	    PREFETCH((ptr_t)deferred);
811 	    break;
812 	  }
813 	  if (current_p > limit) goto next_object;
814 	}
815 #     endif
816 
817       while (current_p <= limit) {
818 	/* Empirically, unrolling this loop doesn't help a lot.	*/
819 	/* Since PUSH_CONTENTS expands to a lot of code,	*/
820 	/* we don't.						*/
821         current = *(word *)current_p;
822 	FIXUP_POINTER(current);
823         PREFETCH(current_p + PREF_DIST*CACHE_LINE_SIZE);
824         if ((ptr_t)current >= least_ha && (ptr_t)current <  greatest_ha) {
825   	  /* Prefetch the contents of the object we just pushed.  It's	*/
826   	  /* likely we will need them soon.				*/
827   	  PREFETCH((ptr_t)current);
828 #         ifdef ENABLE_TRACE
829 	    if (GC_trace_addr == current_p) {
830 	        GC_log_printf("GC:%d Considering(1) %p -> %p\n",
831 			      GC_gc_no, current_p, (ptr_t) current);
832 	    }
833 #         endif /* ENABLE_TRACE */
834           PUSH_CONTENTS((ptr_t)current, mark_stack_top,
835   		           mark_stack_limit, current_p, exit2);
836         }
837         current_p += ALIGNMENT;
838       }
839 
840 #     ifndef SMALL_CONFIG
841 	/* We still need to mark the entry we previously prefetched.	*/
842 	/* We already know that it passes the preliminary pointer	*/
843 	/* validity test.						*/
844 #       ifdef ENABLE_TRACE
845 	    if (GC_trace_addr == current_p) {
846 	        GC_log_printf("GC:%d Considering(2) %p -> %p\n",
847 			      GC_gc_no, current_p, (ptr_t) deferred);
848 	    }
849 #       endif /* ENABLE_TRACE */
850         PUSH_CONTENTS((ptr_t)deferred, mark_stack_top,
851   		         mark_stack_limit, current_p, exit4);
852 	next_object:;
853 #     endif
854     }
855   }
856   return mark_stack_top;
857 }
858 
859 #ifdef PARALLEL_MARK
860 
861 /* We assume we have an ANSI C Compiler.	*/
862 GC_bool GC_help_wanted = FALSE;
863 unsigned GC_helper_count = 0;
864 unsigned GC_active_count = 0;
865 word GC_mark_no = 0;
866 
867 #define LOCAL_MARK_STACK_SIZE HBLKSIZE
868 	/* Under normal circumstances, this is big enough to guarantee	*/
869 	/* We don't overflow half of it in a single call to 		*/
870 	/* GC_mark_from.						*/
871 
872 
873 /* Steal mark stack entries starting at mse low into mark stack local	*/
874 /* until we either steal mse high, or we have max entries.		*/
875 /* Return a pointer to the top of the local mark stack.		        */
876 /* *next is replaced by a pointer to the next unscanned mark stack	*/
877 /* entry.								*/
878 mse * GC_steal_mark_stack(mse * low, mse * high, mse * local,
879 			  unsigned max, mse **next)
880 {
881     mse *p;
882     mse *top = local - 1;
883     unsigned i = 0;
884 
885     GC_ASSERT(high >= low-1 && high - low + 1 <= GC_mark_stack_size);
886     for (p = low; p <= high && i <= max; ++p) {
887 	word descr = AO_load((volatile AO_t *) &(p -> mse_descr));
888 	if (descr != 0) {
889 	    /* Must be ordered after read of descr: */
890 	    AO_store_release_write((volatile AO_t *) &(p -> mse_descr), 0);
891 	    /* More than one thread may get this entry, but that's only */
892 	    /* a minor performance problem.				*/
893 	    ++top;
894 	    top -> mse_descr = descr;
895 	    top -> mse_start = p -> mse_start;
896 	    GC_ASSERT((top -> mse_descr & GC_DS_TAGS) != GC_DS_LENGTH ||
897 		      top -> mse_descr < (ptr_t)GC_greatest_plausible_heap_addr
898 			                 - (ptr_t)GC_least_plausible_heap_addr);
899 	    /* If this is a big object, count it as			*/
900 	    /* size/256 + 1 objects.					*/
901 	    ++i;
902 	    if ((descr & GC_DS_TAGS) == GC_DS_LENGTH) i += (descr >> 8);
903 	}
904     }
905     *next = p;
906     return top;
907 }
908 
909 /* Copy back a local mark stack.	*/
910 /* low and high are inclusive bounds.	*/
911 void GC_return_mark_stack(mse * low, mse * high)
912 {
913     mse * my_top;
914     mse * my_start;
915     size_t stack_size;
916 
917     if (high < low) return;
918     stack_size = high - low + 1;
919     GC_acquire_mark_lock();
920     my_top = GC_mark_stack_top;	/* Concurrent modification impossible. */
921     my_start = my_top + 1;
922     if (my_start - GC_mark_stack + stack_size > GC_mark_stack_size) {
923       if (GC_print_stats) {
924 	  GC_log_printf("No room to copy back mark stack.");
925       }
926       GC_mark_state = MS_INVALID;
927       GC_mark_stack_too_small = TRUE;
928       /* We drop the local mark stack.  We'll fix things later.	*/
929     } else {
930       BCOPY(low, my_start, stack_size * sizeof(mse));
931       GC_ASSERT((mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top))
932 		== my_top);
933       AO_store_release_write((volatile AO_t *)(&GC_mark_stack_top),
934 		             (AO_t)(my_top + stack_size));
935       		/* Ensures visibility of previously written stack contents. */
936     }
937     GC_release_mark_lock();
938     GC_notify_all_marker();
939 }
940 
941 /* Mark from the local mark stack.		*/
942 /* On return, the local mark stack is empty.	*/
943 /* But this may be achieved by copying the	*/
944 /* local mark stack back into the global one.	*/
945 void GC_do_local_mark(mse *local_mark_stack, mse *local_top)
946 {
947     unsigned n;
948 #   define N_LOCAL_ITERS 1
949 
950 #   ifdef GC_ASSERTIONS
951       /* Make sure we don't hold mark lock. */
952 	GC_acquire_mark_lock();
953 	GC_release_mark_lock();
954 #   endif
955     for (;;) {
956         for (n = 0; n < N_LOCAL_ITERS; ++n) {
957 	    local_top = GC_mark_from(local_top, local_mark_stack,
958 				     local_mark_stack + LOCAL_MARK_STACK_SIZE);
959 	    if (local_top < local_mark_stack) return;
960 	    if (local_top - local_mark_stack >= LOCAL_MARK_STACK_SIZE/2) {
961 	 	GC_return_mark_stack(local_mark_stack, local_top);
962 		return;
963 	    }
964 	}
965 	if ((mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top))
966 	    < (mse *)AO_load(&GC_first_nonempty)
967 	    && GC_active_count < GC_helper_count
968 	    && local_top > local_mark_stack + 1) {
969 	    /* Try to share the load, since the main stack is empty,	*/
970 	    /* and helper threads are waiting for a refill.		*/
971 	    /* The entries near the bottom of the stack are likely	*/
972 	    /* to require more work.  Thus we return those, eventhough	*/
973 	    /* it's harder.						*/
974  	    mse * new_bottom = local_mark_stack
975 				+ (local_top - local_mark_stack)/2;
976 	    GC_ASSERT(new_bottom > local_mark_stack
977 		      && new_bottom < local_top);
978 	    GC_return_mark_stack(local_mark_stack, new_bottom - 1);
979 	    memmove(local_mark_stack, new_bottom,
980 		    (local_top - new_bottom + 1) * sizeof(mse));
981 	    local_top -= (new_bottom - local_mark_stack);
982 	}
983     }
984 }
985 
986 #define ENTRIES_TO_GET 5
987 
988 long GC_markers = 2;		/* Normally changed by thread-library-	*/
989 				/* -specific code.			*/
990 
991 /* Mark using the local mark stack until the global mark stack is empty	*/
992 /* and there are no active workers. Update GC_first_nonempty to reflect	*/
993 /* progress.								*/
994 /* Caller does not hold mark lock.					*/
995 /* Caller has already incremented GC_helper_count.  We decrement it,	*/
996 /* and maintain GC_active_count.					*/
997 void GC_mark_local(mse *local_mark_stack, int id)
998 {
999     mse * my_first_nonempty;
1000 
1001     GC_acquire_mark_lock();
1002     GC_active_count++;
1003     my_first_nonempty = (mse *)AO_load(&GC_first_nonempty);
1004     GC_ASSERT((mse *)AO_load(&GC_first_nonempty) >= GC_mark_stack &&
1005 	      (mse *)AO_load(&GC_first_nonempty) <=
1006 	      (mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top)) + 1);
1007     if (GC_print_stats == VERBOSE)
1008 	GC_log_printf("Starting mark helper %lu\n", (unsigned long)id);
1009     GC_release_mark_lock();
1010     for (;;) {
1011   	size_t n_on_stack;
1012         size_t n_to_get;
1013 	mse * my_top;
1014 	mse * local_top;
1015         mse * global_first_nonempty = (mse *)AO_load(&GC_first_nonempty);
1016 
1017     	GC_ASSERT(my_first_nonempty >= GC_mark_stack &&
1018 		  my_first_nonempty <=
1019 		  (mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top))  + 1);
1020     	GC_ASSERT(global_first_nonempty >= GC_mark_stack &&
1021 		  global_first_nonempty <=
1022 		  (mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top))  + 1);
1023 	if (my_first_nonempty < global_first_nonempty) {
1024 	    my_first_nonempty = global_first_nonempty;
1025         } else if (global_first_nonempty < my_first_nonempty) {
1026 	    AO_compare_and_swap(&GC_first_nonempty,
1027 				(AO_t) global_first_nonempty,
1028 				(AO_t) my_first_nonempty);
1029 	    /* If this fails, we just go ahead, without updating	*/
1030 	    /* GC_first_nonempty.					*/
1031 	}
1032 	/* Perhaps we should also update GC_first_nonempty, if it */
1033 	/* is less.  But that would require using atomic updates. */
1034 	my_top = (mse *)AO_load_acquire((volatile AO_t *)(&GC_mark_stack_top));
1035 	n_on_stack = my_top - my_first_nonempty + 1;
1036         if (0 == n_on_stack) {
1037 	    GC_acquire_mark_lock();
1038             my_top = GC_mark_stack_top;
1039 	    	/* Asynchronous modification impossible here, 	*/
1040 	        /* since we hold mark lock. 			*/
1041             n_on_stack = my_top - my_first_nonempty + 1;
1042 	    if (0 == n_on_stack) {
1043 		GC_active_count--;
1044 		GC_ASSERT(GC_active_count <= GC_helper_count);
1045 		/* Other markers may redeposit objects	*/
1046 		/* on the stack.				*/
1047 		if (0 == GC_active_count) GC_notify_all_marker();
1048 		while (GC_active_count > 0
1049 		       && (mse *)AO_load(&GC_first_nonempty)
1050 		          > GC_mark_stack_top) {
1051 		    /* We will be notified if either GC_active_count	*/
1052 		    /* reaches zero, or if more objects are pushed on	*/
1053 		    /* the global mark stack.				*/
1054 		    GC_wait_marker();
1055 		}
1056 		if (GC_active_count == 0 &&
1057 		    (mse *)AO_load(&GC_first_nonempty) > GC_mark_stack_top) {
1058 		    GC_bool need_to_notify = FALSE;
1059 		    /* The above conditions can't be falsified while we	*/
1060 		    /* hold the mark lock, since neither 		*/
1061 		    /* GC_active_count nor GC_mark_stack_top can	*/
1062 		    /* change.  GC_first_nonempty can only be		*/
1063 		    /* incremented asynchronously.  Thus we know that	*/
1064 		    /* both conditions actually held simultaneously.	*/
1065 		    GC_helper_count--;
1066 		    if (0 == GC_helper_count) need_to_notify = TRUE;
1067 		    if (GC_print_stats == VERBOSE)
1068 		      GC_log_printf(
1069 		        "Finished mark helper %lu\n", (unsigned long)id);
1070 		    GC_release_mark_lock();
1071 		    if (need_to_notify) GC_notify_all_marker();
1072 		    return;
1073 		}
1074 		/* else there's something on the stack again, or	*/
1075 		/* another helper may push something.			*/
1076 		GC_active_count++;
1077 	        GC_ASSERT(GC_active_count > 0);
1078 		GC_release_mark_lock();
1079 		continue;
1080 	    } else {
1081 		GC_release_mark_lock();
1082 	    }
1083 	}
1084 	n_to_get = ENTRIES_TO_GET;
1085 	if (n_on_stack < 2 * ENTRIES_TO_GET) n_to_get = 1;
1086 	local_top = GC_steal_mark_stack(my_first_nonempty, my_top,
1087 					local_mark_stack, n_to_get,
1088 				        &my_first_nonempty);
1089         GC_ASSERT(my_first_nonempty >= GC_mark_stack &&
1090 	          my_first_nonempty <=
1091 		    (mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top)) + 1);
1092 	GC_do_local_mark(local_mark_stack, local_top);
1093     }
1094 }
1095 
1096 /* Perform Parallel mark.			*/
1097 /* We hold the GC lock, not the mark lock.	*/
1098 /* Currently runs until the mark stack is	*/
1099 /* empty.					*/
1100 void GC_do_parallel_mark()
1101 {
1102     mse local_mark_stack[LOCAL_MARK_STACK_SIZE];
1103 
1104     GC_acquire_mark_lock();
1105     GC_ASSERT(I_HOLD_LOCK());
1106     /* This could be a GC_ASSERT, but it seems safer to keep it on	*/
1107     /* all the time, especially since it's cheap.			*/
1108     if (GC_help_wanted || GC_active_count != 0 || GC_helper_count != 0)
1109 	ABORT("Tried to start parallel mark in bad state");
1110     if (GC_print_stats == VERBOSE)
1111 	GC_log_printf("Starting marking for mark phase number %lu\n",
1112 		   (unsigned long)GC_mark_no);
1113     GC_first_nonempty = (AO_t)GC_mark_stack;
1114     GC_active_count = 0;
1115     GC_helper_count = 1;
1116     GC_help_wanted = TRUE;
1117     GC_release_mark_lock();
1118     GC_notify_all_marker();
1119 	/* Wake up potential helpers.	*/
1120     GC_mark_local(local_mark_stack, 0);
1121     GC_acquire_mark_lock();
1122     GC_help_wanted = FALSE;
1123     /* Done; clean up.	*/
1124     while (GC_helper_count > 0) GC_wait_marker();
1125     /* GC_helper_count cannot be incremented while GC_help_wanted == FALSE */
1126     if (GC_print_stats == VERBOSE)
1127 	GC_log_printf(
1128 	    "Finished marking for mark phase number %lu\n",
1129 	    (unsigned long)GC_mark_no);
1130     GC_mark_no++;
1131     GC_release_mark_lock();
1132     GC_notify_all_marker();
1133 }
1134 
1135 
1136 /* Try to help out the marker, if it's running.	        */
1137 /* We do not hold the GC lock, but the requestor does.	*/
1138 void GC_help_marker(word my_mark_no)
1139 {
1140     mse local_mark_stack[LOCAL_MARK_STACK_SIZE];
1141     unsigned my_id;
1142 
1143     if (!GC_parallel) return;
1144     GC_acquire_mark_lock();
1145     while (GC_mark_no < my_mark_no
1146            || (!GC_help_wanted && GC_mark_no == my_mark_no)) {
1147       GC_wait_marker();
1148     }
1149     my_id = GC_helper_count;
1150     if (GC_mark_no != my_mark_no || my_id >= GC_markers) {
1151       /* Second test is useful only if original threads can also	*/
1152       /* act as helpers.  Under Linux they can't.			*/
1153       GC_release_mark_lock();
1154       return;
1155     }
1156     GC_helper_count = my_id + 1;
1157     GC_release_mark_lock();
1158     GC_mark_local(local_mark_stack, my_id);
1159     /* GC_mark_local decrements GC_helper_count. */
1160 }
1161 
1162 #endif /* PARALLEL_MARK */
1163 
1164 /* Allocate or reallocate space for mark stack of size n entries.  */
1165 /* May silently fail.						   */
1166 static void alloc_mark_stack(size_t n)
1167 {
1168     mse * new_stack = (mse *)GC_scratch_alloc(n * sizeof(struct GC_ms_entry));
1169 #   ifdef GWW_VDB
1170       /* Don't recycle a stack segment obtained with the wrong flags. 	*/
1171       /* Win32 GetWriteWatch requires the right kind of memory.		*/
1172       static GC_bool GC_incremental_at_stack_alloc = 0;
1173       GC_bool recycle_old = (!GC_incremental || GC_incremental_at_stack_alloc);
1174 
1175       GC_incremental_at_stack_alloc = GC_incremental;
1176 #   else
1177 #     define recycle_old 1
1178 #   endif
1179 
1180     GC_mark_stack_too_small = FALSE;
1181     if (GC_mark_stack_size != 0) {
1182         if (new_stack != 0) {
1183 	  if (recycle_old) {
1184             /* Recycle old space */
1185               size_t page_offset = (word)GC_mark_stack & (GC_page_size - 1);
1186               size_t size = GC_mark_stack_size * sizeof(struct GC_ms_entry);
1187 	      size_t displ = 0;
1188 
1189 	      if (0 != page_offset) displ = GC_page_size - page_offset;
1190 	      size = (size - displ) & ~(GC_page_size - 1);
1191 	      if (size > 0) {
1192 	        GC_add_to_heap((struct hblk *)
1193 	      			((word)GC_mark_stack + displ), (word)size);
1194 	      }
1195 	  }
1196           GC_mark_stack = new_stack;
1197           GC_mark_stack_size = n;
1198 	  GC_mark_stack_limit = new_stack + n;
1199 	  if (GC_print_stats) {
1200 	      GC_log_printf("Grew mark stack to %lu frames\n",
1201 		    	    (unsigned long) GC_mark_stack_size);
1202 	  }
1203         } else {
1204 	  if (GC_print_stats) {
1205 	      GC_log_printf("Failed to grow mark stack to %lu frames\n",
1206 		    	    (unsigned long) n);
1207 	  }
1208         }
1209     } else {
1210         if (new_stack == 0) {
1211             GC_err_printf("No space for mark stack\n");
1212             EXIT();
1213         }
1214         GC_mark_stack = new_stack;
1215         GC_mark_stack_size = n;
1216 	GC_mark_stack_limit = new_stack + n;
1217     }
1218     GC_mark_stack_top = GC_mark_stack-1;
1219 }
1220 
1221 void GC_mark_init()
1222 {
1223     alloc_mark_stack(INITIAL_MARK_STACK_SIZE);
1224 }
1225 
1226 /*
1227  * Push all locations between b and t onto the mark stack.
1228  * b is the first location to be checked. t is one past the last
1229  * location to be checked.
1230  * Should only be used if there is no possibility of mark stack
1231  * overflow.
1232  */
1233 void GC_push_all(ptr_t bottom, ptr_t top)
1234 {
1235     register word length;
1236 
1237     bottom = (ptr_t)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1238     top = (ptr_t)(((word) top) & ~(ALIGNMENT-1));
1239     if (top == 0 || bottom == top) return;
1240     GC_mark_stack_top++;
1241     if (GC_mark_stack_top >= GC_mark_stack_limit) {
1242 	ABORT("unexpected mark stack overflow");
1243     }
1244     length = top - bottom;
1245 #   if GC_DS_TAGS > ALIGNMENT - 1
1246 	length += GC_DS_TAGS;
1247 	length &= ~GC_DS_TAGS;
1248 #   endif
1249     GC_mark_stack_top -> mse_start = bottom;
1250     GC_mark_stack_top -> mse_descr = length;
1251 }
1252 
1253 /*
1254  * Analogous to the above, but push only those pages h with dirty_fn(h) != 0.
1255  * We use push_fn to actually push the block.
1256  * Used both to selectively push dirty pages, or to push a block
1257  * in piecemeal fashion, to allow for more marking concurrency.
1258  * Will not overflow mark stack if push_fn pushes a small fixed number
1259  * of entries.  (This is invoked only if push_fn pushes a single entry,
1260  * or if it marks each object before pushing it, thus ensuring progress
1261  * in the event of a stack overflow.)
1262  */
1263 void GC_push_selected(ptr_t bottom, ptr_t top,
1264 		      int (*dirty_fn) (struct hblk *),
1265 		      void (*push_fn) (ptr_t, ptr_t))
1266 {
1267     struct hblk * h;
1268 
1269     bottom = (ptr_t)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1270     top = (ptr_t)(((word) top) & ~(ALIGNMENT-1));
1271 
1272     if (top == 0 || bottom == top) return;
1273     h = HBLKPTR(bottom + HBLKSIZE);
1274     if (top <= (ptr_t) h) {
1275   	if ((*dirty_fn)(h-1)) {
1276 	    (*push_fn)(bottom, top);
1277 	}
1278 	return;
1279     }
1280     if ((*dirty_fn)(h-1)) {
1281         (*push_fn)(bottom, (ptr_t)h);
1282     }
1283     while ((ptr_t)(h+1) <= top) {
1284 	if ((*dirty_fn)(h)) {
1285 	    if ((word)(GC_mark_stack_top - GC_mark_stack)
1286 		> 3 * GC_mark_stack_size / 4) {
1287 	 	/* Danger of mark stack overflow */
1288 		(*push_fn)((ptr_t)h, top);
1289 		return;
1290 	    } else {
1291 		(*push_fn)((ptr_t)h, (ptr_t)(h+1));
1292 	    }
1293 	}
1294 	h++;
1295     }
1296     if ((ptr_t)h != top) {
1297 	if ((*dirty_fn)(h)) {
1298             (*push_fn)((ptr_t)h, top);
1299         }
1300     }
1301     if (GC_mark_stack_top >= GC_mark_stack_limit) {
1302         ABORT("unexpected mark stack overflow");
1303     }
1304 }
1305 
1306 # ifndef SMALL_CONFIG
1307 
1308 #ifdef PARALLEL_MARK
1309     /* Break up root sections into page size chunks to better spread 	*/
1310     /* out work.							*/
1311     GC_bool GC_true_func(struct hblk *h) { return TRUE; }
1312 #   define GC_PUSH_ALL(b,t) GC_push_selected(b,t,GC_true_func,GC_push_all);
1313 #else
1314 #   define GC_PUSH_ALL(b,t) GC_push_all(b,t);
1315 #endif
1316 
1317 
1318 void GC_push_conditional(ptr_t bottom, ptr_t top, GC_bool all)
1319 {
1320     if (all) {
1321       if (GC_dirty_maintained) {
1322 #	ifdef PROC_VDB
1323 	    /* Pages that were never dirtied cannot contain pointers	*/
1324 	    GC_push_selected(bottom, top, GC_page_was_ever_dirty, GC_push_all);
1325 #	else
1326 	    GC_push_all(bottom, top);
1327 #	endif
1328       } else {
1329       	GC_push_all(bottom, top);
1330       }
1331     } else {
1332 	GC_push_selected(bottom, top, GC_page_was_dirty, GC_push_all);
1333     }
1334 }
1335 #endif
1336 
1337 # if defined(MSWIN32) || defined(MSWINCE)
1338   void __cdecl GC_push_one(word p)
1339 # else
1340   void GC_push_one(word p)
1341 # endif
1342 {
1343     GC_PUSH_ONE_STACK((ptr_t)p, MARKED_FROM_REGISTER);
1344 }
1345 
1346 struct GC_ms_entry *GC_mark_and_push(void *obj,
1347 				     mse *mark_stack_ptr,
1348 				     mse *mark_stack_limit,
1349 				     void **src)
1350 {
1351     hdr * hhdr;
1352 
1353     PREFETCH(obj);
1354     GET_HDR(obj, hhdr);
1355     if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr),FALSE)) {
1356       if (GC_all_interior_pointers) {
1357 	hhdr = GC_find_header(GC_base(obj));
1358 	if (hhdr == 0) {
1359           GC_ADD_TO_BLACK_LIST_NORMAL(obj, src);
1360 	  return mark_stack_ptr;
1361 	}
1362       } else {
1363         GC_ADD_TO_BLACK_LIST_NORMAL(obj, src);
1364 	return mark_stack_ptr;
1365       }
1366     }
1367     if (EXPECT(HBLK_IS_FREE(hhdr),0)) {
1368 	GC_ADD_TO_BLACK_LIST_NORMAL(obj, src);
1369 	return mark_stack_ptr;
1370     }
1371 
1372     PUSH_CONTENTS_HDR(obj, mark_stack_ptr /* modified */, mark_stack_limit,
1373 		      src, was_marked, hhdr, TRUE);
1374  was_marked:
1375     return mark_stack_ptr;
1376 }
1377 
1378 /* Mark and push (i.e. gray) a single object p onto the main	*/
1379 /* mark stack.  Consider p to be valid if it is an interior	*/
1380 /* pointer.							*/
1381 /* The object p has passed a preliminary pointer validity	*/
1382 /* test, but we do not definitely know whether it is valid.	*/
1383 /* Mark bits are NOT atomically updated.  Thus this must be the	*/
1384 /* only thread setting them.					*/
1385 # if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
1386     void GC_mark_and_push_stack(ptr_t p, ptr_t source)
1387 # else
1388     void GC_mark_and_push_stack(ptr_t p)
1389 #   define source 0
1390 # endif
1391 {
1392     hdr * hhdr;
1393     ptr_t r = p;
1394 
1395     PREFETCH(p);
1396     GET_HDR(p, hhdr);
1397     if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr),FALSE)) {
1398         if (hhdr != 0) {
1399           r = GC_base(p);
1400 	  hhdr = HDR(r);
1401 	}
1402         if (hhdr == 0) {
1403             GC_ADD_TO_BLACK_LIST_STACK(p, source);
1404 	    return;
1405 	}
1406     }
1407     if (EXPECT(HBLK_IS_FREE(hhdr),0)) {
1408 	GC_ADD_TO_BLACK_LIST_NORMAL(p, src);
1409 	return;
1410     }
1411 #   if defined(MANUAL_VDB) && defined(THREADS)
1412       /* Pointer is on the stack.  We may have dirtied the object	*/
1413       /* it points to, but not yet have called GC_dirty();	*/
1414       GC_dirty(p);	/* Implicitly affects entire object.	*/
1415 #   endif
1416     PUSH_CONTENTS_HDR(r, GC_mark_stack_top, GC_mark_stack_limit,
1417 		      source, mark_and_push_exit, hhdr, FALSE);
1418   mark_and_push_exit: ;
1419     /* We silently ignore pointers to near the end of a block,	*/
1420     /* which is very mildly suboptimal.				*/
1421     /* FIXME: We should probably add a header word to address	*/
1422     /* this.							*/
1423 }
1424 
1425 # ifdef TRACE_BUF
1426 
1427 # define TRACE_ENTRIES 1000
1428 
1429 struct trace_entry {
1430     char * kind;
1431     word gc_no;
1432     word bytes_allocd;
1433     word arg1;
1434     word arg2;
1435 } GC_trace_buf[TRACE_ENTRIES];
1436 
1437 int GC_trace_buf_ptr = 0;
1438 
1439 void GC_add_trace_entry(char *kind, word arg1, word arg2)
1440 {
1441     GC_trace_buf[GC_trace_buf_ptr].kind = kind;
1442     GC_trace_buf[GC_trace_buf_ptr].gc_no = GC_gc_no;
1443     GC_trace_buf[GC_trace_buf_ptr].bytes_allocd = GC_bytes_allocd;
1444     GC_trace_buf[GC_trace_buf_ptr].arg1 = arg1 ^ 0x80000000;
1445     GC_trace_buf[GC_trace_buf_ptr].arg2 = arg2 ^ 0x80000000;
1446     GC_trace_buf_ptr++;
1447     if (GC_trace_buf_ptr >= TRACE_ENTRIES) GC_trace_buf_ptr = 0;
1448 }
1449 
1450 void GC_print_trace(word gc_no, GC_bool lock)
1451 {
1452     int i;
1453     struct trace_entry *p;
1454 
1455     if (lock) LOCK();
1456     for (i = GC_trace_buf_ptr-1; i != GC_trace_buf_ptr; i--) {
1457     	if (i < 0) i = TRACE_ENTRIES-1;
1458     	p = GC_trace_buf + i;
1459     	if (p -> gc_no < gc_no || p -> kind == 0) return;
1460     	printf("Trace:%s (gc:%d,bytes:%d) 0x%X, 0x%X\n",
1461     		p -> kind, p -> gc_no, p -> bytes_allocd,
1462     		(p -> arg1) ^ 0x80000000, (p -> arg2) ^ 0x80000000);
1463     }
1464     printf("Trace incomplete\n");
1465     if (lock) UNLOCK();
1466 }
1467 
1468 # endif /* TRACE_BUF */
1469 
1470 /*
1471  * A version of GC_push_all that treats all interior pointers as valid
1472  * and scans the entire region immediately, in case the contents
1473  * change.
1474  */
1475 void GC_push_all_eager(ptr_t bottom, ptr_t top)
1476 {
1477     word * b = (word *)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1478     word * t = (word *)(((word) top) & ~(ALIGNMENT-1));
1479     register word *p;
1480     register ptr_t q;
1481     register word *lim;
1482     register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
1483     register ptr_t least_ha = GC_least_plausible_heap_addr;
1484 #   define GC_greatest_plausible_heap_addr greatest_ha
1485 #   define GC_least_plausible_heap_addr least_ha
1486 
1487     if (top == 0) return;
1488     /* check all pointers in range and push if they appear	*/
1489     /* to be valid.						*/
1490       lim = t - 1 /* longword */;
1491       for (p = b; p <= lim; p = (word *)(((ptr_t)p) + ALIGNMENT)) {
1492 	q = (ptr_t)(*p);
1493 	GC_PUSH_ONE_STACK((ptr_t)q, p);
1494       }
1495 #   undef GC_greatest_plausible_heap_addr
1496 #   undef GC_least_plausible_heap_addr
1497 }
1498 
1499 #ifndef THREADS
1500 /*
1501  * A version of GC_push_all that treats all interior pointers as valid
1502  * and scans part of the area immediately, to make sure that saved
1503  * register values are not lost.
1504  * Cold_gc_frame delimits the stack section that must be scanned
1505  * eagerly.  A zero value indicates that no eager scanning is needed.
1506  * We don't need to worry about the MANUAL_VDB case here, since this
1507  * is only called in the single-threaded case.  We assume that we
1508  * cannot collect between an assignment and the corresponding
1509  * GC_dirty() call.
1510  */
1511 void GC_push_all_stack_partially_eager(ptr_t bottom, ptr_t top,
1512 				       ptr_t cold_gc_frame)
1513 {
1514   if (!NEED_FIXUP_POINTER && GC_all_interior_pointers) {
1515     /* Push the hot end of the stack eagerly, so that register values   */
1516     /* saved inside GC frames are marked before they disappear.		*/
1517     /* The rest of the marking can be deferred until later.		*/
1518     if (0 == cold_gc_frame) {
1519 	GC_push_all_stack(bottom, top);
1520 	return;
1521     }
1522     GC_ASSERT(bottom <= cold_gc_frame && cold_gc_frame <= top);
1523 #   ifdef STACK_GROWS_DOWN
1524 	GC_push_all(cold_gc_frame - sizeof(ptr_t), top);
1525 	GC_push_all_eager(bottom, cold_gc_frame);
1526 #   else /* STACK_GROWS_UP */
1527 	GC_push_all(bottom, cold_gc_frame + sizeof(ptr_t));
1528 	GC_push_all_eager(cold_gc_frame, top);
1529 #   endif /* STACK_GROWS_UP */
1530   } else {
1531     GC_push_all_eager(bottom, top);
1532   }
1533 # ifdef TRACE_BUF
1534       GC_add_trace_entry("GC_push_all_stack", bottom, top);
1535 # endif
1536 }
1537 #endif /* !THREADS */
1538 
1539 void GC_push_all_stack(ptr_t bottom, ptr_t top)
1540 {
1541 # if defined(THREADS) && defined(MPROTECT_VDB)
1542     GC_push_all_eager(bottom, top);
1543 # else
1544     if (!NEED_FIXUP_POINTER && GC_all_interior_pointers) {
1545       GC_push_all(bottom, top);
1546     } else {
1547       GC_push_all_eager(bottom, top);
1548     }
1549 # endif
1550 }
1551 
1552 #if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES) && \
1553     defined(MARK_BIT_PER_GRANULE)
1554 # if GC_GRANULE_WORDS == 1
1555 #   define USE_PUSH_MARKED_ACCELERATORS
1556 #   define PUSH_GRANULE(q) \
1557 		{ ptr_t qcontents = (ptr_t)((q)[0]); \
1558 	          GC_PUSH_ONE_HEAP(qcontents, (q)); }
1559 # elif GC_GRANULE_WORDS == 2
1560 #   define USE_PUSH_MARKED_ACCELERATORS
1561 #   define PUSH_GRANULE(q) \
1562 		{ ptr_t qcontents = (ptr_t)((q)[0]); \
1563 	          GC_PUSH_ONE_HEAP(qcontents, (q)); \
1564 		  qcontents = (ptr_t)((q)[1]); \
1565 	          GC_PUSH_ONE_HEAP(qcontents, (q)+1); }
1566 # elif GC_GRANULE_WORDS == 4
1567 #   define USE_PUSH_MARKED_ACCELERATORS
1568 #   define PUSH_GRANULE(q) \
1569 		{ ptr_t qcontents = (ptr_t)((q)[0]); \
1570 	          GC_PUSH_ONE_HEAP(qcontents, (q)); \
1571 		  qcontents = (ptr_t)((q)[1]); \
1572 	          GC_PUSH_ONE_HEAP(qcontents, (q)+1); \
1573 		  qcontents = (ptr_t)((q)[2]); \
1574 	          GC_PUSH_ONE_HEAP(qcontents, (q)+2); \
1575 		  qcontents = (ptr_t)((q)[3]); \
1576 	          GC_PUSH_ONE_HEAP(qcontents, (q)+3); }
1577 # endif
1578 #endif
1579 
1580 #ifdef USE_PUSH_MARKED_ACCELERATORS
1581 /* Push all objects reachable from marked objects in the given block */
1582 /* containing objects of size 1 granule.			     */
1583 void GC_push_marked1(struct hblk *h, hdr *hhdr)
1584 {
1585     word * mark_word_addr = &(hhdr->hb_marks[0]);
1586     word *p;
1587     word *plim;
1588     word *q;
1589     word mark_word;
1590 
1591     /* Allow registers to be used for some frequently acccessed	*/
1592     /* global variables.  Otherwise aliasing issues are likely	*/
1593     /* to prevent that.						*/
1594     ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
1595     ptr_t least_ha = GC_least_plausible_heap_addr;
1596     mse * mark_stack_top = GC_mark_stack_top;
1597     mse * mark_stack_limit = GC_mark_stack_limit;
1598 #   define GC_mark_stack_top mark_stack_top
1599 #   define GC_mark_stack_limit mark_stack_limit
1600 #   define GC_greatest_plausible_heap_addr greatest_ha
1601 #   define GC_least_plausible_heap_addr least_ha
1602 
1603     p = (word *)(h->hb_body);
1604     plim = (word *)(((word)h) + HBLKSIZE);
1605 
1606     /* go through all words in block */
1607 	while( p < plim )  {
1608 	    mark_word = *mark_word_addr++;
1609 	    q = p;
1610 	    while(mark_word != 0) {
1611 	      if (mark_word & 1) {
1612 		  PUSH_GRANULE(q);
1613 	      }
1614 	      q += GC_GRANULE_WORDS;
1615 	      mark_word >>= 1;
1616 	    }
1617 	    p += WORDSZ*GC_GRANULE_WORDS;
1618 	}
1619 
1620 #   undef GC_greatest_plausible_heap_addr
1621 #   undef GC_least_plausible_heap_addr
1622 #   undef GC_mark_stack_top
1623 #   undef GC_mark_stack_limit
1624 
1625     GC_mark_stack_top = mark_stack_top;
1626 }
1627 
1628 
1629 #ifndef UNALIGNED
1630 
1631 /* Push all objects reachable from marked objects in the given block */
1632 /* of size 2 (granules) objects.				     */
1633 void GC_push_marked2(struct hblk *h, hdr *hhdr)
1634 {
1635     word * mark_word_addr = &(hhdr->hb_marks[0]);
1636     word *p;
1637     word *plim;
1638     word *q;
1639     word mark_word;
1640 
1641     ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
1642     ptr_t least_ha = GC_least_plausible_heap_addr;
1643     mse * mark_stack_top = GC_mark_stack_top;
1644     mse * mark_stack_limit = GC_mark_stack_limit;
1645 
1646 #   define GC_mark_stack_top mark_stack_top
1647 #   define GC_mark_stack_limit mark_stack_limit
1648 #   define GC_greatest_plausible_heap_addr greatest_ha
1649 #   define GC_least_plausible_heap_addr least_ha
1650 
1651     p = (word *)(h->hb_body);
1652     plim = (word *)(((word)h) + HBLKSIZE);
1653 
1654     /* go through all words in block */
1655 	while( p < plim )  {
1656 	    mark_word = *mark_word_addr++;
1657 	    q = p;
1658 	    while(mark_word != 0) {
1659 	      if (mark_word & 1) {
1660 		  PUSH_GRANULE(q);
1661 		  PUSH_GRANULE(q + GC_GRANULE_WORDS);
1662 	      }
1663 	      q += 2 * GC_GRANULE_WORDS;
1664 	      mark_word >>= 2;
1665 	    }
1666 	    p += WORDSZ*GC_GRANULE_WORDS;
1667 	}
1668 
1669 #   undef GC_greatest_plausible_heap_addr
1670 #   undef GC_least_plausible_heap_addr
1671 #   undef GC_mark_stack_top
1672 #   undef GC_mark_stack_limit
1673 
1674     GC_mark_stack_top = mark_stack_top;
1675 }
1676 
1677 # if GC_GRANULE_WORDS < 4
1678 /* Push all objects reachable from marked objects in the given block */
1679 /* of size 4 (granules) objects.				     */
1680 /* There is a risk of mark stack overflow here.  But we handle that. */
1681 /* And only unmarked objects get pushed, so it's not very likely.    */
1682 void GC_push_marked4(struct hblk *h, hdr *hhdr)
1683 {
1684     word * mark_word_addr = &(hhdr->hb_marks[0]);
1685     word *p;
1686     word *plim;
1687     word *q;
1688     word mark_word;
1689 
1690     ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
1691     ptr_t least_ha = GC_least_plausible_heap_addr;
1692     mse * mark_stack_top = GC_mark_stack_top;
1693     mse * mark_stack_limit = GC_mark_stack_limit;
1694 #   define GC_mark_stack_top mark_stack_top
1695 #   define GC_mark_stack_limit mark_stack_limit
1696 #   define GC_greatest_plausible_heap_addr greatest_ha
1697 #   define GC_least_plausible_heap_addr least_ha
1698 
1699     p = (word *)(h->hb_body);
1700     plim = (word *)(((word)h) + HBLKSIZE);
1701 
1702     /* go through all words in block */
1703 	while( p < plim )  {
1704 	    mark_word = *mark_word_addr++;
1705 	    q = p;
1706 	    while(mark_word != 0) {
1707 	      if (mark_word & 1) {
1708 		  PUSH_GRANULE(q);
1709 		  PUSH_GRANULE(q + GC_GRANULE_WORDS);
1710 		  PUSH_GRANULE(q + 2*GC_GRANULE_WORDS);
1711 		  PUSH_GRANULE(q + 3*GC_GRANULE_WORDS);
1712 	      }
1713 	      q += 4 * GC_GRANULE_WORDS;
1714 	      mark_word >>= 4;
1715 	    }
1716 	    p += WORDSZ*GC_GRANULE_WORDS;
1717 	}
1718 #   undef GC_greatest_plausible_heap_addr
1719 #   undef GC_least_plausible_heap_addr
1720 #   undef GC_mark_stack_top
1721 #   undef GC_mark_stack_limit
1722     GC_mark_stack_top = mark_stack_top;
1723 }
1724 
1725 #endif /* GC_GRANULE_WORDS < 4 */
1726 
1727 #endif /* UNALIGNED */
1728 
1729 #endif /* USE_PUSH_MARKED_ACCELERATORS */
1730 
1731 /* Push all objects reachable from marked objects in the given block */
1732 void GC_push_marked(struct hblk *h, hdr *hhdr)
1733 {
1734     size_t sz = hhdr -> hb_sz;
1735     word descr = hhdr -> hb_descr;
1736     ptr_t p;
1737     word bit_no;
1738     ptr_t lim;
1739     mse * GC_mark_stack_top_reg;
1740     mse * mark_stack_limit = GC_mark_stack_limit;
1741 
1742     /* Some quick shortcuts: */
1743 	if ((0 | GC_DS_LENGTH) == descr) return;
1744         if (GC_block_empty(hhdr)/* nothing marked */) return;
1745     GC_n_rescuing_pages++;
1746     GC_objects_are_marked = TRUE;
1747     if (sz > MAXOBJBYTES) {
1748         lim = h -> hb_body;
1749     } else {
1750         lim = (h + 1)->hb_body - sz;
1751     }
1752 
1753     switch(BYTES_TO_GRANULES(sz)) {
1754 #   if defined(USE_PUSH_MARKED_ACCELERATORS)
1755      case 1:
1756        GC_push_marked1(h, hhdr);
1757        break;
1758 #    if !defined(UNALIGNED)
1759        case 2:
1760          GC_push_marked2(h, hhdr);
1761          break;
1762 #     if GC_GRANULE_WORDS < 4
1763        case 4:
1764          GC_push_marked4(h, hhdr);
1765          break;
1766 #     endif
1767 #    endif
1768 #   endif
1769      default:
1770       GC_mark_stack_top_reg = GC_mark_stack_top;
1771       for (p = h -> hb_body, bit_no = 0; p <= lim;
1772 	   p += sz, bit_no += MARK_BIT_OFFSET(sz)) {
1773          if (mark_bit_from_hdr(hhdr, bit_no)) {
1774            /* Mark from fields inside the object */
1775              PUSH_OBJ(p, hhdr, GC_mark_stack_top_reg, mark_stack_limit);
1776          }
1777       }
1778       GC_mark_stack_top = GC_mark_stack_top_reg;
1779     }
1780 }
1781 
1782 #ifndef SMALL_CONFIG
1783 /* Test whether any page in the given block is dirty	*/
1784 GC_bool GC_block_was_dirty(struct hblk *h, hdr *hhdr)
1785 {
1786     size_t sz = hhdr -> hb_sz;
1787 
1788     if (sz <= MAXOBJBYTES) {
1789          return(GC_page_was_dirty(h));
1790     } else {
1791     	 ptr_t p = (ptr_t)h;
1792          while (p < (ptr_t)h + sz) {
1793              if (GC_page_was_dirty((struct hblk *)p)) return(TRUE);
1794              p += HBLKSIZE;
1795          }
1796          return(FALSE);
1797     }
1798 }
1799 #endif /* SMALL_CONFIG */
1800 
1801 /* Similar to GC_push_next_marked, but return address of next block	*/
1802 struct hblk * GC_push_next_marked(struct hblk *h)
1803 {
1804     hdr * hhdr = HDR(h);
1805 
1806     if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr), FALSE)) {
1807       h = GC_next_used_block(h);
1808       if (h == 0) return(0);
1809       hhdr = GC_find_header((ptr_t)h);
1810     }
1811     GC_push_marked(h, hhdr);
1812     return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
1813 }
1814 
1815 #ifndef SMALL_CONFIG
1816 /* Identical to above, but mark only from dirty pages	*/
1817 struct hblk * GC_push_next_marked_dirty(struct hblk *h)
1818 {
1819     hdr * hhdr = HDR(h);
1820 
1821     if (!GC_dirty_maintained) { ABORT("dirty bits not set up"); }
1822     for (;;) {
1823 	if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr), FALSE)) {
1824           h = GC_next_used_block(h);
1825           if (h == 0) return(0);
1826           hhdr = GC_find_header((ptr_t)h);
1827 	}
1828 #	ifdef STUBBORN_ALLOC
1829           if (hhdr -> hb_obj_kind == STUBBORN) {
1830             if (GC_page_was_changed(h) && GC_block_was_dirty(h, hhdr)) {
1831                 break;
1832             }
1833           } else {
1834             if (GC_block_was_dirty(h, hhdr)) break;
1835           }
1836 #	else
1837 	  if (GC_block_was_dirty(h, hhdr)) break;
1838 #	endif
1839         h += OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz);
1840 	hhdr = HDR(h);
1841     }
1842     GC_push_marked(h, hhdr);
1843     return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
1844 }
1845 #endif
1846 
1847 /* Similar to above, but for uncollectable pages.  Needed since we	*/
1848 /* do not clear marks for such pages, even for full collections.	*/
1849 struct hblk * GC_push_next_marked_uncollectable(struct hblk *h)
1850 {
1851     hdr * hhdr = HDR(h);
1852 
1853     for (;;) {
1854 	if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr), FALSE)) {
1855           h = GC_next_used_block(h);
1856           if (h == 0) return(0);
1857           hhdr = GC_find_header((ptr_t)h);
1858 	}
1859 	if (hhdr -> hb_obj_kind == UNCOLLECTABLE) break;
1860         h += OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz);
1861 	hhdr = HDR(h);
1862     }
1863     GC_push_marked(h, hhdr);
1864     return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
1865 }
1866 
1867