1 /*
2  * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
3  * Copyright (c) 2001 by Hewlett-Packard Company. All rights reserved.
4  *
5  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
6  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
7  *
8  * Permission is hereby granted to use or copy this program
9  * for any purpose,  provided the above notices are retained on all copies.
10  * Permission to modify the code and to distribute modified code is granted,
11  * provided the above notices are retained, and a notice that the code was
12  * modified is included with the above copyright notice.
13  *
14  */
15 
16 /* Private declarations of GC marker data structures and macros */
17 
18 /*
19  * Declarations of mark stack.  Needed by marker and client supplied mark
20  * routines.  Transitively include gc_priv.h.
21  * (Note that gc_priv.h should not be included before this, since this
22  * includes dbg_mlc.h, which wants to include gc_priv.h AFTER defining
23  * I_HIDE_POINTERS.)
24  */
25 #ifndef GC_PMARK_H
26 # define GC_PMARK_H
27 
28 # if defined(KEEP_BACK_PTRS) || defined(PRINT_BLACK_LIST)
29 #   include "dbg_mlc.h"
30 # endif
31 # ifndef GC_MARK_H
32 #   include "../gc_mark.h"
33 # endif
34 # ifndef GC_PRIVATE_H
35 #   include "gc_priv.h"
36 # endif
37 
38 /* The real declarations of the following is in gc_priv.h, so that	*/
39 /* we can avoid scanning the following table.				*/
40 /*
41 extern mark_proc GC_mark_procs[MAX_MARK_PROCS];
42 */
43 
44 /*
45  * Mark descriptor stuff that should remain private for now, mostly
46  * because it's hard to export WORDSZ without including gcconfig.h.
47  */
48 # define BITMAP_BITS (WORDSZ - GC_DS_TAG_BITS)
49 # define PROC(descr) \
50 	(GC_mark_procs[((descr) >> GC_DS_TAG_BITS) & (GC_MAX_MARK_PROCS-1)])
51 # define ENV(descr) \
52 	((descr) >> (GC_DS_TAG_BITS + GC_LOG_MAX_MARK_PROCS))
53 # define MAX_ENV \
54   	(((word)1 << (WORDSZ - GC_DS_TAG_BITS - GC_LOG_MAX_MARK_PROCS)) - 1)
55 
56 
57 extern unsigned GC_n_mark_procs;
58 
59 /* Number of mark stack entries to discard on overflow.	*/
60 #define GC_MARK_STACK_DISCARDS (INITIAL_MARK_STACK_SIZE/8)
61 
62 typedef struct GC_ms_entry {
63     ptr_t mse_start;   /* First word of object */
64     GC_word mse_descr;	/* Descriptor; low order two bits are tags,	*/
65     			/* identifying the upper 30 bits as one of the	*/
66     			/* following:					*/
67 } mse;
68 
69 extern size_t GC_mark_stack_size;
70 
71 extern mse * GC_mark_stack_limit;
72 
73 #ifdef PARALLEL_MARK
74   extern mse * volatile GC_mark_stack_top;
75 #else
76   extern mse * GC_mark_stack_top;
77 #endif
78 
79 extern mse * GC_mark_stack;
80 
81 #ifdef PARALLEL_MARK
82     /*
83      * Allow multiple threads to participate in the marking process.
84      * This works roughly as follows:
85      *  The main mark stack never shrinks, but it can grow.
86      *
87      *	The initiating threads holds the GC lock, and sets GC_help_wanted.
88      *
89      *  Other threads:
90      *     1) update helper_count (while holding mark_lock.)
91      *	   2) allocate a local mark stack
92      *     repeatedly:
93      *		3) Steal a global mark stack entry by atomically replacing
94      *		   its descriptor with 0.
95      *		4) Copy it to the local stack.
96      *	        5) Mark on the local stack until it is empty, or
97      *		   it may be profitable to copy it back.
98      *	        6) If necessary, copy local stack to global one,
99      *		   holding mark lock.
100      *    7) Stop when the global mark stack is empty.
101      *    8) decrement helper_count (holding mark_lock).
102      *
103      * This is an experiment to see if we can do something along the lines
104      * of the University of Tokyo SGC in a less intrusive, though probably
105      * also less performant, way.
106      */
107     void GC_do_parallel_mark();
108 		/* inititate parallel marking.	*/
109 
110     extern GC_bool GC_help_wanted;	/* Protected by mark lock	*/
111     extern unsigned GC_helper_count;	/* Number of running helpers.	*/
112 					/* Protected by mark lock	*/
113     extern unsigned GC_active_count;	/* Number of active helpers.	*/
114 					/* Protected by mark lock	*/
115 					/* May increase and decrease	*/
116 					/* within each mark cycle.  But	*/
117 					/* once it returns to 0, it	*/
118 					/* stays zero for the cycle.	*/
119     /* GC_mark_stack_top is also protected by mark lock.	*/
120     /*
121      * GC_notify_all_marker() is used when GC_help_wanted is first set,
122      * when the last helper becomes inactive,
123      * when something is added to the global mark stack, and just after
124      * GC_mark_no is incremented.
125      * This could be split into multiple CVs (and probably should be to
126      * scale to really large numbers of processors.)
127      */
128 #endif /* PARALLEL_MARK */
129 
130 /* Return a pointer to within 1st page of object.  	*/
131 /* Set *new_hdr_p to corr. hdr.				*/
132 ptr_t GC_find_start(ptr_t current, hdr *hhdr, hdr **new_hdr_p);
133 
134 mse * GC_signal_mark_stack_overflow(mse *msp);
135 
136 /* Push the object obj with corresponding heap block header hhdr onto 	*/
137 /* the mark stack.							*/
138 # define PUSH_OBJ(obj, hhdr, mark_stack_top, mark_stack_limit) \
139 { \
140     register word _descr = (hhdr) -> hb_descr; \
141         \
142     if (_descr != 0) { \
143         mark_stack_top++; \
144         if (mark_stack_top >= mark_stack_limit) { \
145           mark_stack_top = GC_signal_mark_stack_overflow(mark_stack_top); \
146         } \
147         mark_stack_top -> mse_start = (obj); \
148         mark_stack_top -> mse_descr = _descr; \
149     } \
150 }
151 
152 /* Push the contents of current onto the mark stack if it is a valid	*/
153 /* ptr to a currently unmarked object.  Mark it.			*/
154 /* If we assumed a standard-conforming compiler, we could probably	*/
155 /* generate the exit_label transparently.				*/
156 # define PUSH_CONTENTS(current, mark_stack_top, mark_stack_limit, \
157 		       source, exit_label) \
158 { \
159     hdr * my_hhdr; \
160  \
161     HC_GET_HDR(current, my_hhdr, source, exit_label); \
162     PUSH_CONTENTS_HDR(current, mark_stack_top, mark_stack_limit, \
163 		  source, exit_label, my_hhdr, TRUE);	\
164 exit_label: ; \
165 }
166 
167 /* Set mark bit, exit if it was already set.	*/
168 
169 # ifdef USE_MARK_BITS
170 #   ifdef PARALLEL_MARK
171       /* The following may fail to exit even if the bit was already set.    */
172       /* For our uses, that's benign:                                       */
173 #     define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \
174         { \
175           if (!(*(addr) & (mask))) { \
176             AO_or((AO_t *)(addr), (mask); \
177           } else { \
178             goto label; \
179           } \
180         }
181 #   else
182 #     define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \
183         { \
184            word old = *(addr); \
185            word my_bits = (bits); \
186            if (old & my_bits) goto exit_label; \
187            *(addr) = (old | my_bits); \
188          }
189 #   endif /* !PARALLEL_MARK */
190 #   define SET_MARK_BIT_EXIT_IF_SET(hhdr,bit_no,exit_label) \
191     { \
192         word * mark_word_addr = hhdr -> hb_marks + divWORDSZ(bit_no); \
193       \
194         OR_WORD_EXIT_IF_SET(mark_word_addr, (word)1 << modWORDSZ(bit_no), \
195                             exit_label); \
196     }
197 # endif
198 
199 
200 #ifdef USE_MARK_BYTES
201 # if defined(I386) && defined(__GNUC__)
202 #  define LONG_MULT(hprod, lprod, x, y) { \
203 	asm("mull %2" : "=a"(lprod), "=d"(hprod) : "g"(y), "0"(x)); \
204    }
205 # else /* No in-line X86 assembly code */
206 #  define LONG_MULT(hprod, lprod, x, y) { \
207 	unsigned long long prod = (unsigned long long)x \
208 				  * (unsigned long long)y; \
209 	hprod = prod >> 32;  \
210 	lprod = (unsigned32)prod;  \
211    }
212 # endif
213 
214   /* There is a race here, and we may set				*/
215   /* the bit twice in the concurrent case.  This can result in the	*/
216   /* object being pushed twice.  But that's only a performance issue.	*/
217 # define SET_MARK_BIT_EXIT_IF_SET(hhdr,bit_no,exit_label) \
218     { \
219         char * mark_byte_addr = (char *)hhdr -> hb_marks + (bit_no); \
220         char mark_byte = *mark_byte_addr; \
221           \
222 	if (mark_byte) goto exit_label; \
223 	*mark_byte_addr = 1;  \
224     }
225 #endif /* USE_MARK_BYTES */
226 
227 #ifdef PARALLEL_MARK
228 # define INCR_MARKS(hhdr) \
229 	AO_store(&(hhdr -> hb_n_marks), AO_load(&(hhdr -> hb_n_marks))+1);
230 #else
231 # define INCR_MARKS(hhdr) ++(hhdr -> hb_n_marks)
232 #endif
233 
234 #ifdef ENABLE_TRACE
235 # define TRACE(source, cmd) \
236 	if (GC_trace_addr != 0 && (ptr_t)(source) == GC_trace_addr) cmd
237 # define TRACE_TARGET(target, cmd) \
238 	if (GC_trace_addr != 0 && (target) == *(ptr_t *)GC_trace_addr) cmd
239 #else
240 # define TRACE(source, cmd)
241 # define TRACE_TARGET(source, cmd)
242 #endif
243 /* If the mark bit corresponding to current is not set, set it, and 	*/
244 /* push the contents of the object on the mark stack.  Current points	*/
245 /* to the bginning of the object.  We rely on the fact that the 	*/
246 /* preceding header calculation will succeed for a pointer past the 	*/
247 /* forst page of an object, only if it is in fact a valid pointer	*/
248 /* to the object.  Thus we can omit the otherwise necessary tests	*/
249 /* here.  Note in particular tha the "displ" value is the displacement	*/
250 /* from the beggining of the heap block, which may itself be in the	*/
251 /* interior of a large object.						*/
252 #ifdef MARK_BIT_PER_GRANULE
253 # define PUSH_CONTENTS_HDR(current, mark_stack_top, mark_stack_limit, \
254 		           source, exit_label, hhdr, do_offset_check) \
255 { \
256     size_t displ = HBLKDISPL(current); /* Displacement in block; in bytes. */\
257     /* displ is always within range.  If current doesn't point to	*/ \
258     /* first block, then we are in the all_interior_pointers case, and	*/ \
259     /* it is safe to use any displacement value.			*/ \
260     size_t gran_displ = BYTES_TO_GRANULES(displ); \
261     size_t gran_offset = hhdr -> hb_map[gran_displ];	\
262     size_t byte_offset = displ & (GRANULE_BYTES - 1); \
263     ptr_t base = current;  \
264     /* The following always fails for large block references. */ \
265     if (EXPECT((gran_offset | byte_offset) != 0, FALSE))  { \
266 	if (hhdr -> hb_large_block) { \
267 	  /* gran_offset is bogus.	*/ \
268 	  size_t obj_displ; \
269 	  base = (ptr_t)(hhdr -> hb_block); \
270 	  obj_displ = (ptr_t)(current) - base;  \
271 	  if (obj_displ != displ) { \
272 	    GC_ASSERT(obj_displ < hhdr -> hb_sz); \
273 	    /* Must be in all_interior_pointer case, not first block */ \
274 	    /* already did validity check on cache miss.	     */ \
275 	    ; \
276 	  } else { \
277 	    if (do_offset_check && !GC_valid_offsets[obj_displ]) { \
278 	      GC_ADD_TO_BLACK_LIST_NORMAL(current, source); \
279 	      goto exit_label; \
280 	    } \
281 	  } \
282 	  gran_displ = 0; \
283 	  GC_ASSERT(hhdr -> hb_sz > HBLKSIZE || \
284 		    hhdr -> hb_block == HBLKPTR(current)); \
285 	  GC_ASSERT((ptr_t)(hhdr -> hb_block) <= (ptr_t) current); \
286 	} else { \
287 	  size_t obj_displ = GRANULES_TO_BYTES(gran_offset) \
288 		      	     + byte_offset; \
289 	  if (do_offset_check && !GC_valid_offsets[obj_displ]) { \
290 	    GC_ADD_TO_BLACK_LIST_NORMAL(current, source); \
291 	    goto exit_label; \
292 	  } \
293 	  gran_displ -= gran_offset; \
294 	  base -= obj_displ; \
295 	} \
296     } \
297     GC_ASSERT(hhdr == GC_find_header(base)); \
298     GC_ASSERT(gran_displ % BYTES_TO_GRANULES(hhdr -> hb_sz) == 0); \
299     TRACE(source, GC_log_printf("GC:%d: passed validity tests\n",GC_gc_no)); \
300     SET_MARK_BIT_EXIT_IF_SET(hhdr, gran_displ, exit_label); \
301     TRACE(source, GC_log_printf("GC:%d: previously unmarked\n",GC_gc_no)); \
302     TRACE_TARGET(base, \
303 	GC_log_printf("GC:%d: marking %p from %p instead\n", GC_gc_no, \
304 		      base, source)); \
305     INCR_MARKS(hhdr); \
306     GC_STORE_BACK_PTR((ptr_t)source, base); \
307     PUSH_OBJ(base, hhdr, mark_stack_top, mark_stack_limit); \
308 }
309 #endif /* MARK_BIT_PER_GRANULE */
310 
311 #ifdef MARK_BIT_PER_OBJ
312 # define PUSH_CONTENTS_HDR(current, mark_stack_top, mark_stack_limit, \
313 		           source, exit_label, hhdr, do_offset_check) \
314 { \
315     size_t displ = HBLKDISPL(current); /* Displacement in block; in bytes. */\
316     unsigned32 low_prod, high_prod, offset_fraction; \
317     unsigned32 inv_sz = hhdr -> hb_inv_sz; \
318     ptr_t base = current;  \
319     LONG_MULT(high_prod, low_prod, displ, inv_sz); \
320     /* product is > and within sz_in_bytes of displ * sz_in_bytes * 2**32 */ \
321     if (EXPECT(low_prod >> 16 != 0, FALSE))  { \
322 	    FIXME: fails if offset is a multiple of HBLKSIZE which becomes 0 \
323 	if (inv_sz == LARGE_INV_SZ) { \
324 	  size_t obj_displ; \
325 	  base = (ptr_t)(hhdr -> hb_block); \
326 	  obj_displ = (ptr_t)(current) - base;  \
327 	  if (obj_displ != displ) { \
328 	    GC_ASSERT(obj_displ < hhdr -> hb_sz); \
329 	    /* Must be in all_interior_pointer case, not first block */ \
330 	    /* already did validity check on cache miss.	     */ \
331 	    ; \
332 	  } else { \
333 	    if (do_offset_check && !GC_valid_offsets[obj_displ]) { \
334 	      GC_ADD_TO_BLACK_LIST_NORMAL(current, source); \
335 	      goto exit_label; \
336 	    } \
337 	  } \
338 	  GC_ASSERT(hhdr -> hb_sz > HBLKSIZE || \
339 		    hhdr -> hb_block == HBLKPTR(current)); \
340 	  GC_ASSERT((ptr_t)(hhdr -> hb_block) < (ptr_t) current); \
341 	} else { \
342 	  /* Accurate enough if HBLKSIZE <= 2**15.	*/ \
343 	  GC_ASSERT(HBLKSIZE <= (1 << 15)); \
344 	  size_t obj_displ = (((low_prod >> 16) + 1) * (hhdr -> hb_sz)) >> 16; \
345 	  if (do_offset_check && !GC_valid_offsets[obj_displ]) { \
346 	    GC_ADD_TO_BLACK_LIST_NORMAL(current, source); \
347 	    goto exit_label; \
348 	  } \
349 	  base -= obj_displ; \
350 	} \
351     } \
352     /* May get here for pointer to start of block not at	*/ \
353     /* beginning of object.  If so, it's valid, and we're fine. */ \
354     GC_ASSERT(high_prod >= 0 && high_prod <= HBLK_OBJS(hhdr -> hb_sz)); \
355     TRACE(source, GC_log_printf("GC:%d: passed validity tests\n",GC_gc_no)); \
356     SET_MARK_BIT_EXIT_IF_SET(hhdr, high_prod, exit_label); \
357     TRACE(source, GC_log_printf("GC:%d: previously unmarked\n",GC_gc_no)); \
358     TRACE_TARGET(base, \
359 	GC_log_printf("GC:%d: marking %p from %p instead\n", GC_gc_no, \
360 		      base, source)); \
361     INCR_MARKS(hhdr); \
362     GC_STORE_BACK_PTR((ptr_t)source, base); \
363     PUSH_OBJ(base, hhdr, mark_stack_top, mark_stack_limit); \
364 }
365 #endif /* MARK_BIT_PER_OBJ */
366 
367 #if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
368 #   define PUSH_ONE_CHECKED_STACK(p, source) \
369 	GC_mark_and_push_stack(p, (ptr_t)(source))
370 #else
371 #   define PUSH_ONE_CHECKED_STACK(p, source) \
372 	GC_mark_and_push_stack(p)
373 #endif
374 
375 /*
376  * Push a single value onto mark stack. Mark from the object pointed to by p.
377  * Invoke FIXUP_POINTER(p) before any further processing.
378  * P is considered valid even if it is an interior pointer.
379  * Previously marked objects are not pushed.  Hence we make progress even
380  * if the mark stack overflows.
381  */
382 
383 # if NEED_FIXUP_POINTER
384     /* Try both the raw version and the fixed up one.	*/
385 #   define GC_PUSH_ONE_STACK(p, source) \
386       if ((p) >= (ptr_t)GC_least_plausible_heap_addr 	\
387 	 && (p) < (ptr_t)GC_greatest_plausible_heap_addr) {	\
388 	 PUSH_ONE_CHECKED_STACK(p, source);	\
389       } \
390       FIXUP_POINTER(p); \
391       if ((p) >= (ptr_t)GC_least_plausible_heap_addr 	\
392 	 && (p) < (ptr_t)GC_greatest_plausible_heap_addr) {	\
393 	 PUSH_ONE_CHECKED_STACK(p, source);	\
394       }
395 # else /* !NEED_FIXUP_POINTER */
396 #   define GC_PUSH_ONE_STACK(p, source) \
397       if ((ptr_t)(p) >= (ptr_t)GC_least_plausible_heap_addr 	\
398 	 && (ptr_t)(p) < (ptr_t)GC_greatest_plausible_heap_addr) {	\
399 	 PUSH_ONE_CHECKED_STACK(p, source);	\
400       }
401 # endif
402 
403 
404 /*
405  * As above, but interior pointer recognition as for
406  * normal heap pointers.
407  */
408 # define GC_PUSH_ONE_HEAP(p,source) \
409     FIXUP_POINTER(p); \
410     if ((p) >= (ptr_t)GC_least_plausible_heap_addr 	\
411 	 && (p) < (ptr_t)GC_greatest_plausible_heap_addr) {	\
412 	    GC_mark_stack_top = GC_mark_and_push( \
413 			    (void *)(p), GC_mark_stack_top, \
414 			    GC_mark_stack_limit, (void * *)(source)); \
415     }
416 
417 /* Mark starting at mark stack entry top (incl.) down to	*/
418 /* mark stack entry bottom (incl.).  Stop after performing	*/
419 /* about one page worth of work.  Return the new mark stack	*/
420 /* top entry.							*/
421 mse * GC_mark_from(mse * top, mse * bottom, mse *limit);
422 
423 #define MARK_FROM_MARK_STACK() \
424 	GC_mark_stack_top = GC_mark_from(GC_mark_stack_top, \
425 					 GC_mark_stack, \
426 					 GC_mark_stack + GC_mark_stack_size);
427 
428 /*
429  * Mark from one finalizable object using the specified
430  * mark proc. May not mark the object pointed to by
431  * real_ptr. That is the job of the caller, if appropriate.
432  * Note that this is called with the mutator running, but
433  * with us holding the allocation lock.  This is safe only if the
434  * mutator needs tha allocation lock to reveal hidden pointers.
435  * FIXME: Why do we need the GC_mark_state test below?
436  */
437 # define GC_MARK_FO(real_ptr, mark_proc) \
438 { \
439     (*(mark_proc))(real_ptr); \
440     while (!GC_mark_stack_empty()) MARK_FROM_MARK_STACK(); \
441     if (GC_mark_state != MS_NONE) { \
442         GC_set_mark_bit(real_ptr); \
443         while (!GC_mark_some((ptr_t)0)) {} \
444     } \
445 }
446 
447 extern GC_bool GC_mark_stack_too_small;
448 				/* We need a larger mark stack.  May be	*/
449 				/* set by client supplied mark routines.*/
450 
451 typedef int mark_state_t;	/* Current state of marking, as follows:*/
452 				/* Used to remember where we are during */
453 				/* concurrent marking.			*/
454 
455 				/* We say something is dirty if it was	*/
456 				/* written since the last time we	*/
457 				/* retrieved dirty bits.  We say it's 	*/
458 				/* grungy if it was marked dirty in the	*/
459 				/* last set of bits we retrieved.	*/
460 
461 				/* Invariant I: all roots and marked	*/
462 				/* objects p are either dirty, or point */
463 				/* to objects q that are either marked 	*/
464 				/* or a pointer to q appears in a range	*/
465 				/* on the mark stack.			*/
466 
467 # define MS_NONE 0		/* No marking in progress. I holds.	*/
468 				/* Mark stack is empty.			*/
469 
470 # define MS_PUSH_RESCUERS 1	/* Rescuing objects are currently 	*/
471 				/* being pushed.  I holds, except	*/
472 				/* that grungy roots may point to 	*/
473 				/* unmarked objects, as may marked	*/
474 				/* grungy objects above scan_ptr.	*/
475 
476 # define MS_PUSH_UNCOLLECTABLE 2
477 				/* I holds, except that marked 		*/
478 				/* uncollectable objects above scan_ptr */
479 				/* may point to unmarked objects.	*/
480 				/* Roots may point to unmarked objects	*/
481 
482 # define MS_ROOTS_PUSHED 3	/* I holds, mark stack may be nonempty  */
483 
484 # define MS_PARTIALLY_INVALID 4	/* I may not hold, e.g. because of M.S. */
485 				/* overflow.  However marked heap	*/
486 				/* objects below scan_ptr point to	*/
487 				/* marked or stacked objects.		*/
488 
489 # define MS_INVALID 5		/* I may not hold.			*/
490 
491 extern mark_state_t GC_mark_state;
492 
493 #endif  /* GC_PMARK_H */
494 
495