1 /*
2 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
3 * Copyright (c) 1991-1995 by Xerox Corporation. All rights reserved.
4 * Copyright (c) 2000 by Hewlett-Packard Company. All rights reserved.
5 *
6 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
7 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
8 *
9 * Permission is hereby granted to use or copy this program
10 * for any purpose, provided the above notices are retained on all copies.
11 * Permission to modify the code and to distribute modified code is granted,
12 * provided the above notices are retained, and a notice that the code was
13 * modified is included with the above copyright notice.
14 *
15 */
16
17 #include "private/gc_pmark.h"
18
19 #ifndef MAX_MARKERS
20 # define MAX_MARKERS 16
21 #endif
22
23 #include <stdio.h>
24
25 #if defined(MSWIN32) && defined(__GNUC__)
26 # include <excpt.h>
27 #endif
28
29 /* Make arguments appear live to compiler. Put here to minimize the */
30 /* risk of inlining. Used to minimize junk left in registers. */
GC_noop6(word arg1 GC_ATTR_UNUSED,word arg2 GC_ATTR_UNUSED,word arg3 GC_ATTR_UNUSED,word arg4 GC_ATTR_UNUSED,word arg5 GC_ATTR_UNUSED,word arg6 GC_ATTR_UNUSED)31 void GC_noop6(word arg1 GC_ATTR_UNUSED, word arg2 GC_ATTR_UNUSED,
32 word arg3 GC_ATTR_UNUSED, word arg4 GC_ATTR_UNUSED,
33 word arg5 GC_ATTR_UNUSED, word arg6 GC_ATTR_UNUSED)
34 {
35 /* Empty */
36 }
37
38 /* Single argument version, robust against whole program analysis. */
39 volatile word GC_noop_sink;
GC_noop1(word x)40 GC_API void GC_CALL GC_noop1(word x)
41 {
42 GC_noop_sink = x;
43 }
44
45 /* mark_proc GC_mark_procs[MAX_MARK_PROCS] = {0} -- declared in gc_priv.h */
46
47 GC_INNER unsigned GC_n_mark_procs = GC_RESERVED_MARK_PROCS;
48
49 /* Initialize GC_obj_kinds properly and standard free lists properly. */
50 /* This must be done statically since they may be accessed before */
51 /* GC_init is called. */
52 /* It's done here, since we need to deal with mark descriptors. */
53 GC_INNER struct obj_kind GC_obj_kinds[MAXOBJKINDS] = {
54 /* PTRFREE */ { &GC_aobjfreelist[0], 0 /* filled in dynamically */,
55 /* 0 | */ GC_DS_LENGTH, FALSE, FALSE
56 /*, */ OK_DISCLAIM_INITZ },
57 /* NORMAL */ { &GC_objfreelist[0], 0,
58 /* 0 | */ GC_DS_LENGTH,
59 /* adjusted in GC_init for EXTRA_BYTES */
60 TRUE /* add length to descr */, TRUE
61 /*, */ OK_DISCLAIM_INITZ },
62 /* UNCOLLECTABLE */
63 { &GC_uobjfreelist[0], 0,
64 /* 0 | */ GC_DS_LENGTH, TRUE /* add length to descr */, TRUE
65 /*, */ OK_DISCLAIM_INITZ },
66 # ifdef ATOMIC_UNCOLLECTABLE
67 /* AUNCOLLECTABLE */
68 { &GC_auobjfreelist[0], 0,
69 /* 0 | */ GC_DS_LENGTH, FALSE /* add length to descr */, FALSE
70 /*, */ OK_DISCLAIM_INITZ },
71 # endif
72 # ifdef STUBBORN_ALLOC
73 /*STUBBORN*/ { (void **)&GC_sobjfreelist[0], 0,
74 /* 0 | */ GC_DS_LENGTH, TRUE /* add length to descr */, TRUE
75 /*, */ OK_DISCLAIM_INITZ },
76 # endif
77 };
78
79 # ifdef ATOMIC_UNCOLLECTABLE
80 # ifdef STUBBORN_ALLOC
81 # define GC_N_KINDS_INITIAL_VALUE 5
82 # else
83 # define GC_N_KINDS_INITIAL_VALUE 4
84 # endif
85 # else
86 # ifdef STUBBORN_ALLOC
87 # define GC_N_KINDS_INITIAL_VALUE 4
88 # else
89 # define GC_N_KINDS_INITIAL_VALUE 3
90 # endif
91 # endif
92
93 GC_INNER unsigned GC_n_kinds = GC_N_KINDS_INITIAL_VALUE;
94
95 # ifndef INITIAL_MARK_STACK_SIZE
96 # define INITIAL_MARK_STACK_SIZE (1*HBLKSIZE)
97 /* INITIAL_MARK_STACK_SIZE * sizeof(mse) should be a */
98 /* multiple of HBLKSIZE. */
99 /* The incremental collector actually likes a larger */
100 /* size, since it want to push all marked dirty objs */
101 /* before marking anything new. Currently we let it */
102 /* grow dynamically. */
103 # endif
104
105 STATIC word GC_n_rescuing_pages = 0;
106 /* Number of dirty pages we marked from */
107 /* excludes ptrfree pages, etc. */
108
109 GC_INNER size_t GC_mark_stack_size = 0;
110
111 #ifdef PARALLEL_MARK
112 STATIC volatile AO_t GC_first_nonempty = 0;
113 /* Lowest entry on mark stack */
114 /* that may be nonempty. */
115 /* Updated only by initiating */
116 /* thread. */
117 #endif
118
119 GC_INNER mark_state_t GC_mark_state = MS_NONE;
120
121 GC_INNER GC_bool GC_mark_stack_too_small = FALSE;
122
123 static struct hblk * scan_ptr;
124
125 STATIC GC_bool GC_objects_are_marked = FALSE;
126 /* Are there collectible marked objects in the heap? */
127
128 /* Is a collection in progress? Note that this can return true in the */
129 /* nonincremental case, if a collection has been abandoned and the */
130 /* mark state is now MS_INVALID. */
GC_collection_in_progress(void)131 GC_INNER GC_bool GC_collection_in_progress(void)
132 {
133 return(GC_mark_state != MS_NONE);
134 }
135
136 /* clear all mark bits in the header */
GC_clear_hdr_marks(hdr * hhdr)137 GC_INNER void GC_clear_hdr_marks(hdr *hhdr)
138 {
139 size_t last_bit = FINAL_MARK_BIT(hhdr -> hb_sz);
140 BZERO(hhdr -> hb_marks, sizeof(hhdr->hb_marks));
141 set_mark_bit_from_hdr(hhdr, last_bit);
142 hhdr -> hb_n_marks = 0;
143 }
144
145 /* Set all mark bits in the header. Used for uncollectible blocks. */
GC_set_hdr_marks(hdr * hhdr)146 GC_INNER void GC_set_hdr_marks(hdr *hhdr)
147 {
148 unsigned i;
149 size_t sz = hhdr -> hb_sz;
150 unsigned n_marks = (unsigned)FINAL_MARK_BIT(sz);
151
152 # ifdef USE_MARK_BYTES
153 for (i = 0; i <= n_marks; i += (unsigned)MARK_BIT_OFFSET(sz)) {
154 hhdr -> hb_marks[i] = 1;
155 }
156 # else
157 for (i = 0; i < divWORDSZ(n_marks + WORDSZ); ++i) {
158 hhdr -> hb_marks[i] = ONES;
159 }
160 # endif
161 # ifdef MARK_BIT_PER_OBJ
162 hhdr -> hb_n_marks = n_marks - 1;
163 # else
164 hhdr -> hb_n_marks = HBLK_OBJS(sz);
165 # endif
166 }
167
168 /*
169 * Clear all mark bits associated with block h.
170 */
clear_marks_for_block(struct hblk * h,word dummy GC_ATTR_UNUSED)171 static void clear_marks_for_block(struct hblk *h, word dummy GC_ATTR_UNUSED)
172 {
173 register hdr * hhdr = HDR(h);
174
175 if (IS_UNCOLLECTABLE(hhdr -> hb_obj_kind)) return;
176 /* Mark bit for these is cleared only once the object is */
177 /* explicitly deallocated. This either frees the block, or */
178 /* the bit is cleared once the object is on the free list. */
179 GC_clear_hdr_marks(hhdr);
180 }
181
182 /* Slow but general routines for setting/clearing/asking about mark bits */
GC_set_mark_bit(const void * p)183 GC_API void GC_CALL GC_set_mark_bit(const void *p)
184 {
185 struct hblk *h = HBLKPTR(p);
186 hdr * hhdr = HDR(h);
187 word bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, hhdr -> hb_sz);
188
189 if (!mark_bit_from_hdr(hhdr, bit_no)) {
190 set_mark_bit_from_hdr(hhdr, bit_no);
191 ++hhdr -> hb_n_marks;
192 }
193 }
194
GC_clear_mark_bit(const void * p)195 GC_API void GC_CALL GC_clear_mark_bit(const void *p)
196 {
197 struct hblk *h = HBLKPTR(p);
198 hdr * hhdr = HDR(h);
199 word bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, hhdr -> hb_sz);
200
201 if (mark_bit_from_hdr(hhdr, bit_no)) {
202 size_t n_marks;
203 clear_mark_bit_from_hdr(hhdr, bit_no);
204 n_marks = hhdr -> hb_n_marks - 1;
205 # ifdef PARALLEL_MARK
206 if (n_marks != 0 || !GC_parallel)
207 hhdr -> hb_n_marks = n_marks;
208 /* Don't decrement to zero. The counts are approximate due to */
209 /* concurrency issues, but we need to ensure that a count of */
210 /* zero implies an empty block. */
211 # else
212 hhdr -> hb_n_marks = n_marks;
213 # endif
214 }
215 }
216
GC_is_marked(const void * p)217 GC_API int GC_CALL GC_is_marked(const void *p)
218 {
219 struct hblk *h = HBLKPTR(p);
220 hdr * hhdr = HDR(h);
221 word bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, hhdr -> hb_sz);
222
223 return (int)mark_bit_from_hdr(hhdr, bit_no); /* 0 or 1 */
224 }
225
226 /*
227 * Clear mark bits in all allocated heap blocks. This invalidates
228 * the marker invariant, and sets GC_mark_state to reflect this.
229 * (This implicitly starts marking to reestablish the invariant.)
230 */
GC_clear_marks(void)231 GC_INNER void GC_clear_marks(void)
232 {
233 GC_apply_to_all_blocks(clear_marks_for_block, (word)0);
234 GC_objects_are_marked = FALSE;
235 GC_mark_state = MS_INVALID;
236 scan_ptr = 0;
237 }
238
239 #ifdef CHECKSUMS
240 void GC_check_dirty(void);
241 #endif
242
243 /* Initiate a garbage collection. Initiates a full collection if the */
244 /* mark state is invalid. */
GC_initiate_gc(void)245 GC_INNER void GC_initiate_gc(void)
246 {
247 # ifndef GC_DISABLE_INCREMENTAL
248 if (GC_dirty_maintained) GC_read_dirty();
249 # endif
250 # ifdef STUBBORN_ALLOC
251 GC_read_changed();
252 # endif
253 # ifdef CHECKSUMS
254 if (GC_dirty_maintained) GC_check_dirty();
255 # endif
256 GC_n_rescuing_pages = 0;
257 if (GC_mark_state == MS_NONE) {
258 GC_mark_state = MS_PUSH_RESCUERS;
259 } else if (GC_mark_state != MS_INVALID) {
260 ABORT("Unexpected state");
261 } /* else this is really a full collection, and mark */
262 /* bits are invalid. */
263 scan_ptr = 0;
264 }
265
266 #ifdef PARALLEL_MARK
267 STATIC void GC_do_parallel_mark(void); /* initiate parallel marking. */
268 #endif /* PARALLEL_MARK */
269
270 #ifdef GC_DISABLE_INCREMENTAL
271 # define GC_push_next_marked_dirty(h) GC_push_next_marked(h)
272 #else
273 STATIC struct hblk * GC_push_next_marked_dirty(struct hblk *h);
274 /* Invoke GC_push_marked on next dirty block above h. */
275 /* Return a pointer just past the end of this block. */
276 #endif /* !GC_DISABLE_INCREMENTAL */
277 STATIC struct hblk * GC_push_next_marked(struct hblk *h);
278 /* Ditto, but also mark from clean pages. */
279 STATIC struct hblk * GC_push_next_marked_uncollectable(struct hblk *h);
280 /* Ditto, but mark only from uncollectible pages. */
281
282 static void alloc_mark_stack(size_t);
283
284 # if (((defined(MSWIN32) || defined(MSWINCE)) && !defined(__GNUC__)) \
285 || (defined(MSWIN32) && defined(I386)) /* for Win98 */ \
286 || (defined(USE_PROC_FOR_LIBRARIES) && defined(THREADS))) \
287 && !defined(NO_WRAP_MARK_SOME)
288 /* Under rare conditions, we may end up marking from nonexistent memory. */
289 /* Hence we need to be prepared to recover by running GC_mark_some */
290 /* with a suitable handler in place. */
291 /* FIXME: Should we really need it for WinCE? If yes then */
292 /* WRAP_MARK_SOME should be also defined for CeGCC which requires */
293 /* CPU/OS-specific code in mark_ex_handler() and GC_mark_some() */
294 /* (for manual stack unwinding and exception handler installation). */
295 # define WRAP_MARK_SOME
296 # endif
297
298 /* Perform a small amount of marking. */
299 /* We try to touch roughly a page of memory. */
300 /* Return TRUE if we just finished a mark phase. */
301 /* Cold_gc_frame is an address inside a GC frame that */
302 /* remains valid until all marking is complete. */
303 /* A zero value indicates that it's OK to miss some */
304 /* register values. */
305 /* We hold the allocation lock. In the case of */
306 /* incremental collection, the world may not be stopped.*/
307 #ifdef WRAP_MARK_SOME
308 /* For win32, this is called after we establish a structured */
309 /* exception handler, in case Windows unmaps one of our root */
310 /* segments. See below. In either case, we acquire the */
311 /* allocator lock long before we get here. */
GC_mark_some_inner(ptr_t cold_gc_frame)312 STATIC GC_bool GC_mark_some_inner(ptr_t cold_gc_frame)
313 #else
314 GC_INNER GC_bool GC_mark_some(ptr_t cold_gc_frame)
315 #endif
316 {
317 switch(GC_mark_state) {
318 case MS_NONE:
319 break;
320
321 case MS_PUSH_RESCUERS:
322 if ((word)GC_mark_stack_top
323 >= (word)(GC_mark_stack_limit - INITIAL_MARK_STACK_SIZE/2)) {
324 /* Go ahead and mark, even though that might cause us to */
325 /* see more marked dirty objects later on. Avoid this */
326 /* in the future. */
327 GC_mark_stack_too_small = TRUE;
328 MARK_FROM_MARK_STACK();
329 break;
330 } else {
331 scan_ptr = GC_push_next_marked_dirty(scan_ptr);
332 if (scan_ptr == 0) {
333 GC_COND_LOG_PRINTF("Marked from %lu dirty pages\n",
334 (unsigned long)GC_n_rescuing_pages);
335 GC_push_roots(FALSE, cold_gc_frame);
336 GC_objects_are_marked = TRUE;
337 if (GC_mark_state != MS_INVALID) {
338 GC_mark_state = MS_ROOTS_PUSHED;
339 }
340 }
341 }
342 break;
343
344 case MS_PUSH_UNCOLLECTABLE:
345 if ((word)GC_mark_stack_top
346 >= (word)(GC_mark_stack + GC_mark_stack_size/4)) {
347 # ifdef PARALLEL_MARK
348 /* Avoid this, since we don't parallelize the marker */
349 /* here. */
350 if (GC_parallel) GC_mark_stack_too_small = TRUE;
351 # endif
352 MARK_FROM_MARK_STACK();
353 break;
354 } else {
355 scan_ptr = GC_push_next_marked_uncollectable(scan_ptr);
356 if (scan_ptr == 0) {
357 GC_push_roots(TRUE, cold_gc_frame);
358 GC_objects_are_marked = TRUE;
359 if (GC_mark_state != MS_INVALID) {
360 GC_mark_state = MS_ROOTS_PUSHED;
361 }
362 }
363 }
364 break;
365
366 case MS_ROOTS_PUSHED:
367 # ifdef PARALLEL_MARK
368 /* In the incremental GC case, this currently doesn't */
369 /* quite do the right thing, since it runs to */
370 /* completion. On the other hand, starting a */
371 /* parallel marker is expensive, so perhaps it is */
372 /* the right thing? */
373 /* Eventually, incremental marking should run */
374 /* asynchronously in multiple threads, without grabbing */
375 /* the allocation lock. */
376 if (GC_parallel) {
377 GC_do_parallel_mark();
378 GC_ASSERT((word)GC_mark_stack_top < (word)GC_first_nonempty);
379 GC_mark_stack_top = GC_mark_stack - 1;
380 if (GC_mark_stack_too_small) {
381 alloc_mark_stack(2*GC_mark_stack_size);
382 }
383 if (GC_mark_state == MS_ROOTS_PUSHED) {
384 GC_mark_state = MS_NONE;
385 return(TRUE);
386 }
387 break;
388 }
389 # endif
390 if ((word)GC_mark_stack_top >= (word)GC_mark_stack) {
391 MARK_FROM_MARK_STACK();
392 break;
393 } else {
394 GC_mark_state = MS_NONE;
395 if (GC_mark_stack_too_small) {
396 alloc_mark_stack(2*GC_mark_stack_size);
397 }
398 return(TRUE);
399 }
400
401 case MS_INVALID:
402 case MS_PARTIALLY_INVALID:
403 if (!GC_objects_are_marked) {
404 GC_mark_state = MS_PUSH_UNCOLLECTABLE;
405 break;
406 }
407 if ((word)GC_mark_stack_top >= (word)GC_mark_stack) {
408 MARK_FROM_MARK_STACK();
409 break;
410 }
411 if (scan_ptr == 0 && GC_mark_state == MS_INVALID) {
412 /* About to start a heap scan for marked objects. */
413 /* Mark stack is empty. OK to reallocate. */
414 if (GC_mark_stack_too_small) {
415 alloc_mark_stack(2*GC_mark_stack_size);
416 }
417 GC_mark_state = MS_PARTIALLY_INVALID;
418 }
419 scan_ptr = GC_push_next_marked(scan_ptr);
420 if (scan_ptr == 0 && GC_mark_state == MS_PARTIALLY_INVALID) {
421 GC_push_roots(TRUE, cold_gc_frame);
422 GC_objects_are_marked = TRUE;
423 if (GC_mark_state != MS_INVALID) {
424 GC_mark_state = MS_ROOTS_PUSHED;
425 }
426 }
427 break;
428
429 default:
430 ABORT("GC_mark_some: bad state");
431 }
432 return(FALSE);
433 }
434
435 #ifdef WRAP_MARK_SOME
436
437 # if (defined(MSWIN32) || defined(MSWINCE)) && defined(__GNUC__)
438
439 typedef struct {
440 EXCEPTION_REGISTRATION ex_reg;
441 void *alt_path;
442 } ext_ex_regn;
443
mark_ex_handler(struct _EXCEPTION_RECORD * ex_rec,void * est_frame,struct _CONTEXT * context,void * disp_ctxt GC_ATTR_UNUSED)444 static EXCEPTION_DISPOSITION mark_ex_handler(
445 struct _EXCEPTION_RECORD *ex_rec,
446 void *est_frame,
447 struct _CONTEXT *context,
448 void *disp_ctxt GC_ATTR_UNUSED)
449 {
450 if (ex_rec->ExceptionCode == STATUS_ACCESS_VIOLATION) {
451 ext_ex_regn *xer = (ext_ex_regn *)est_frame;
452
453 /* Unwind from the inner function assuming the standard */
454 /* function prologue. */
455 /* Assumes code has not been compiled with */
456 /* -fomit-frame-pointer. */
457 context->Esp = context->Ebp;
458 context->Ebp = *((DWORD *)context->Esp);
459 context->Esp = context->Esp - 8;
460
461 /* Resume execution at the "real" handler within the */
462 /* wrapper function. */
463 context->Eip = (DWORD )(xer->alt_path);
464
465 return ExceptionContinueExecution;
466
467 } else {
468 return ExceptionContinueSearch;
469 }
470 }
471 # endif /* __GNUC__ && MSWIN32 */
472
473 #if defined(GC_WIN32_THREADS) && !defined(__GNUC__)
474 GC_INNER GC_bool GC_started_thread_while_stopped(void);
475 /* In win32_threads.c. Did we invalidate mark phase with an */
476 /* unexpected thread start? */
477 #endif
478
GC_mark_some(ptr_t cold_gc_frame)479 GC_INNER GC_bool GC_mark_some(ptr_t cold_gc_frame)
480 {
481 GC_bool ret_val;
482
483 # if defined(MSWIN32) || defined(MSWINCE)
484 # ifndef __GNUC__
485 /* Windows 98 appears to asynchronously create and remove */
486 /* writable memory mappings, for reasons we haven't yet */
487 /* understood. Since we look for writable regions to */
488 /* determine the root set, we may try to mark from an */
489 /* address range that disappeared since we started the */
490 /* collection. Thus we have to recover from faults here. */
491 /* This code does not appear to be necessary for Windows */
492 /* 95/NT/2000+. Note that this code should never generate */
493 /* an incremental GC write fault. */
494 /* This code seems to be necessary for WinCE (at least in */
495 /* the case we'd decide to add MEM_PRIVATE sections to */
496 /* data roots in GC_register_dynamic_libraries()). */
497 /* It's conceivable that this is the same issue with */
498 /* terminating threads that we see with Linux and */
499 /* USE_PROC_FOR_LIBRARIES. */
500
501 __try {
502 ret_val = GC_mark_some_inner(cold_gc_frame);
503 } __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
504 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
505 goto handle_ex;
506 }
507 # ifdef GC_WIN32_THREADS
508 /* With DllMain-based thread tracking, a thread may have */
509 /* started while we were marking. This is logically equivalent */
510 /* to the exception case; our results are invalid and we have */
511 /* to start over. This cannot be prevented since we can't */
512 /* block in DllMain. */
513 if (GC_started_thread_while_stopped()) goto handle_ex;
514 # endif
515 rm_handler:
516 return ret_val;
517
518 # else /* __GNUC__ */
519
520 /* Manually install an exception handler since GCC does */
521 /* not yet support Structured Exception Handling (SEH) on */
522 /* Win32. */
523
524 ext_ex_regn er;
525
526 er.alt_path = &&handle_ex;
527 er.ex_reg.handler = mark_ex_handler;
528 __asm__ __volatile__ ("movl %%fs:0, %0" : "=r" (er.ex_reg.prev));
529 __asm__ __volatile__ ("movl %0, %%fs:0" : : "r" (&er));
530 ret_val = GC_mark_some_inner(cold_gc_frame);
531 /* Prevent GCC from considering the following code unreachable */
532 /* and thus eliminating it. */
533 if (er.alt_path == 0)
534 goto handle_ex;
535 rm_handler:
536 /* Uninstall the exception handler */
537 __asm__ __volatile__ ("mov %0, %%fs:0" : : "r" (er.ex_reg.prev));
538 return ret_val;
539
540 # endif /* __GNUC__ */
541 # else /* !MSWIN32 */
542 /* Here we are handling the case in which /proc is used for root */
543 /* finding, and we have threads. We may find a stack for a */
544 /* thread that is in the process of exiting, and disappears */
545 /* while we are marking it. This seems extremely difficult to */
546 /* avoid otherwise. */
547 if (GC_incremental) {
548 WARN("Incremental GC incompatible with /proc roots\n", 0);
549 /* I'm not sure if this could still work ... */
550 }
551 GC_setup_temporary_fault_handler();
552 if(SETJMP(GC_jmp_buf) != 0) goto handle_ex;
553 ret_val = GC_mark_some_inner(cold_gc_frame);
554 rm_handler:
555 GC_reset_fault_handler();
556 return ret_val;
557
558 # endif /* !MSWIN32 */
559
560 handle_ex:
561 /* Exception handler starts here for all cases. */
562 WARN("Caught ACCESS_VIOLATION in marker;"
563 " memory mapping disappeared\n", 0);
564
565 /* We have bad roots on the stack. Discard mark stack. */
566 /* Rescan from marked objects. Redetermine roots. */
567 GC_invalidate_mark_state();
568 scan_ptr = 0;
569
570 ret_val = FALSE;
571 goto rm_handler; /* Back to platform-specific code. */
572 }
573 #endif /* WRAP_MARK_SOME */
574
GC_invalidate_mark_state(void)575 GC_INNER void GC_invalidate_mark_state(void)
576 {
577 GC_mark_state = MS_INVALID;
578 GC_mark_stack_top = GC_mark_stack-1;
579 }
580
GC_signal_mark_stack_overflow(mse * msp)581 GC_INNER mse * GC_signal_mark_stack_overflow(mse *msp)
582 {
583 GC_mark_state = MS_INVALID;
584 # ifdef PARALLEL_MARK
585 /* We are using a local_mark_stack in parallel mode, so */
586 /* do not signal the global mark stack to be resized. */
587 /* That will be done if required in GC_return_mark_stack. */
588 if (!GC_parallel)
589 GC_mark_stack_too_small = TRUE;
590 # else
591 GC_mark_stack_too_small = TRUE;
592 # endif
593 GC_COND_LOG_PRINTF("Mark stack overflow; current size = %lu entries\n",
594 (unsigned long)GC_mark_stack_size);
595 return(msp - GC_MARK_STACK_DISCARDS);
596 }
597
598 /*
599 * Mark objects pointed to by the regions described by
600 * mark stack entries between mark_stack and mark_stack_top,
601 * inclusive. Assumes the upper limit of a mark stack entry
602 * is never 0. A mark stack entry never has size 0.
603 * We try to traverse on the order of a hblk of memory before we return.
604 * Caller is responsible for calling this until the mark stack is empty.
605 * Note that this is the most performance critical routine in the
606 * collector. Hence it contains all sorts of ugly hacks to speed
607 * things up. In particular, we avoid procedure calls on the common
608 * path, we take advantage of peculiarities of the mark descriptor
609 * encoding, we optionally maintain a cache for the block address to
610 * header mapping, we prefetch when an object is "grayed", etc.
611 */
GC_mark_from(mse * mark_stack_top,mse * mark_stack,mse * mark_stack_limit)612 GC_INNER mse * GC_mark_from(mse *mark_stack_top, mse *mark_stack,
613 mse *mark_stack_limit)
614 {
615 signed_word credit = HBLKSIZE; /* Remaining credit for marking work */
616 ptr_t current_p; /* Pointer to current candidate ptr. */
617 word current; /* Candidate pointer. */
618 ptr_t limit = 0; /* (Incl) limit of current candidate range. */
619 word descr;
620 ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
621 ptr_t least_ha = GC_least_plausible_heap_addr;
622 DECLARE_HDR_CACHE;
623
624 # define SPLIT_RANGE_WORDS 128 /* Must be power of 2. */
625
626 GC_objects_are_marked = TRUE;
627 INIT_HDR_CACHE;
628 # ifdef OS2 /* Use untweaked version to circumvent compiler problem */
629 while ((word)mark_stack_top >= (word)mark_stack && credit >= 0)
630 # else
631 while ((((ptr_t)mark_stack_top - (ptr_t)mark_stack) | credit) >= 0)
632 # endif
633 {
634 current_p = mark_stack_top -> mse_start;
635 descr = mark_stack_top -> mse_descr.w;
636 retry:
637 /* current_p and descr describe the current object. */
638 /* *mark_stack_top is vacant. */
639 /* The following is 0 only for small objects described by a simple */
640 /* length descriptor. For many applications this is the common */
641 /* case, so we try to detect it quickly. */
642 if (descr & ((~(WORDS_TO_BYTES(SPLIT_RANGE_WORDS) - 1)) | GC_DS_TAGS)) {
643 word tag = descr & GC_DS_TAGS;
644
645 GC_STATIC_ASSERT(GC_DS_TAGS == 0x3);
646 switch(tag) {
647 case GC_DS_LENGTH:
648 /* Large length. */
649 /* Process part of the range to avoid pushing too much on the */
650 /* stack. */
651 GC_ASSERT(descr < (word)GC_greatest_plausible_heap_addr
652 - (word)GC_least_plausible_heap_addr);
653 # ifdef ENABLE_TRACE
654 if ((word)GC_trace_addr >= (word)current_p
655 && (word)GC_trace_addr < (word)(current_p + descr)) {
656 GC_log_printf("GC #%u: large section; start %p, len %lu\n",
657 (unsigned)GC_gc_no, current_p, (unsigned long)descr);
658 }
659 # endif /* ENABLE_TRACE */
660 # ifdef PARALLEL_MARK
661 # define SHARE_BYTES 2048
662 if (descr > SHARE_BYTES && GC_parallel
663 && (word)mark_stack_top < (word)(mark_stack_limit - 1)) {
664 int new_size = (descr/2) & ~(sizeof(word)-1);
665 mark_stack_top -> mse_start = current_p;
666 mark_stack_top -> mse_descr.w = new_size + sizeof(word);
667 /* makes sure we handle */
668 /* misaligned pointers. */
669 mark_stack_top++;
670 # ifdef ENABLE_TRACE
671 if ((word)GC_trace_addr >= (word)current_p
672 && (word)GC_trace_addr < (word)(current_p + descr)) {
673 GC_log_printf("GC #%u: splitting (parallel) %p at %p\n",
674 (unsigned)GC_gc_no, current_p, current_p + new_size);
675 }
676 # endif /* ENABLE_TRACE */
677 current_p += new_size;
678 descr -= new_size;
679 goto retry;
680 }
681 # endif /* PARALLEL_MARK */
682 mark_stack_top -> mse_start =
683 limit = current_p + WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1);
684 mark_stack_top -> mse_descr.w =
685 descr - WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1);
686 # ifdef ENABLE_TRACE
687 if ((word)GC_trace_addr >= (word)current_p
688 && (word)GC_trace_addr < (word)(current_p + descr)) {
689 GC_log_printf("GC #%u: splitting %p at %p\n",
690 (unsigned)GC_gc_no, current_p, limit);
691 }
692 # endif /* ENABLE_TRACE */
693 /* Make sure that pointers overlapping the two ranges are */
694 /* considered. */
695 limit += sizeof(word) - ALIGNMENT;
696 break;
697 case GC_DS_BITMAP:
698 mark_stack_top--;
699 # ifdef ENABLE_TRACE
700 if ((word)GC_trace_addr >= (word)current_p
701 && (word)GC_trace_addr < (word)(current_p
702 + WORDS_TO_BYTES(WORDSZ-2))) {
703 GC_log_printf("GC #%u: tracing from %p bitmap descr %lu\n",
704 (unsigned)GC_gc_no, current_p,
705 (unsigned long)descr);
706 }
707 # endif /* ENABLE_TRACE */
708 descr &= ~GC_DS_TAGS;
709 credit -= WORDS_TO_BYTES(WORDSZ/2); /* guess */
710 while (descr != 0) {
711 if ((signed_word)descr < 0) {
712 current = *(word *)current_p;
713 FIXUP_POINTER(current);
714 if (current >= (word)least_ha && current < (word)greatest_ha) {
715 PREFETCH((ptr_t)current);
716 # ifdef ENABLE_TRACE
717 if (GC_trace_addr == current_p) {
718 GC_log_printf("GC #%u: considering(3) %p -> %p\n",
719 (unsigned)GC_gc_no, current_p,
720 (ptr_t)current);
721 }
722 # endif /* ENABLE_TRACE */
723 PUSH_CONTENTS((ptr_t)current, mark_stack_top,
724 mark_stack_limit, current_p, exit1);
725 }
726 }
727 descr <<= 1;
728 current_p += sizeof(word);
729 }
730 continue;
731 case GC_DS_PROC:
732 mark_stack_top--;
733 # ifdef ENABLE_TRACE
734 if ((word)GC_trace_addr >= (word)current_p
735 && GC_base(current_p) != 0
736 && GC_base(current_p) == GC_base(GC_trace_addr)) {
737 GC_log_printf("GC #%u: tracing from %p, proc descr %lu\n",
738 (unsigned)GC_gc_no, current_p,
739 (unsigned long)descr);
740 }
741 # endif /* ENABLE_TRACE */
742 credit -= GC_PROC_BYTES;
743 mark_stack_top = (*PROC(descr))((word *)current_p, mark_stack_top,
744 mark_stack_limit, ENV(descr));
745 continue;
746 case GC_DS_PER_OBJECT:
747 if ((signed_word)descr >= 0) {
748 /* Descriptor is in the object. */
749 descr = *(word *)(current_p + descr - GC_DS_PER_OBJECT);
750 } else {
751 /* Descriptor is in type descriptor pointed to by first */
752 /* word in object. */
753 ptr_t type_descr = *(ptr_t *)current_p;
754 /* type_descr is either a valid pointer to the descriptor */
755 /* structure, or this object was on a free list. If it */
756 /* it was anything but the last object on the free list, */
757 /* we will misinterpret the next object on the free list as */
758 /* the type descriptor, and get a 0 GC descriptor, which */
759 /* is ideal. Unfortunately, we need to check for the last */
760 /* object case explicitly. */
761 if (0 == type_descr) {
762 /* Rarely executed. */
763 mark_stack_top--;
764 continue;
765 }
766 if ((GC_word)(type_descr) >= (GC_word)GC_least_plausible_heap_addr
767 && (GC_word)(type_descr)
768 <= (GC_word)GC_greatest_plausible_heap_addr) {
769 /* type_descr looks like a pointer into the heap. */
770 /* It could still be the link pointer in a free list */
771 /* though. That's not a problem as long as the offset */
772 /* of the actual descriptor in the pointed to object is */
773 /* within the same object. In that case it will either */
774 /* point at the next free object in the list (if offset */
775 /* is 0) or be zeroed (which we check for below, */
776 /* descr == 0). If the offset is larger than the */
777 /* objects in the block type_descr points to it cannot */
778 /* be a proper pointer. */
779 word offset = ~(descr + (GC_INDIR_PER_OBJ_BIAS
780 - GC_DS_PER_OBJECT - 1));
781 hdr *hhdr;
782 GET_HDR(type_descr, hhdr);
783 if (NULL == hhdr || hhdr->hb_sz - sizeof(word) < offset) {
784 mark_stack_top--;
785 continue;
786 }
787 }
788 descr = *(word *)(type_descr
789 - (descr + (GC_INDIR_PER_OBJ_BIAS
790 - GC_DS_PER_OBJECT)));
791 }
792 if (0 == descr) {
793 /* Can happen either because we generated a 0 descriptor */
794 /* or we saw a pointer to a free object. */
795 mark_stack_top--;
796 continue;
797 }
798 goto retry;
799 }
800 } else /* Small object with length descriptor */ {
801 mark_stack_top--;
802 # ifndef SMALL_CONFIG
803 if (descr < sizeof(word))
804 continue;
805 # endif
806 limit = current_p + (word)descr;
807 }
808 # ifdef ENABLE_TRACE
809 if ((word)GC_trace_addr >= (word)current_p
810 && (word)GC_trace_addr < (word)limit) {
811 GC_log_printf("GC #%u: Tracing from %p, length is %lu\n",
812 (unsigned)GC_gc_no, current_p, (unsigned long)descr);
813 }
814 # endif /* ENABLE_TRACE */
815 /* The simple case in which we're scanning a range. */
816 GC_ASSERT(!((word)current_p & (ALIGNMENT-1)));
817 credit -= limit - current_p;
818 limit -= sizeof(word);
819 {
820 # define PREF_DIST 4
821
822 # ifndef SMALL_CONFIG
823 word deferred;
824
825 /* Try to prefetch the next pointer to be examined ASAP. */
826 /* Empirically, this also seems to help slightly without */
827 /* prefetches, at least on linux/X86. Presumably this loop */
828 /* ends up with less register pressure, and gcc thus ends up */
829 /* generating slightly better code. Overall gcc code quality */
830 /* for this loop is still not great. */
831 for(;;) {
832 PREFETCH(limit - PREF_DIST*CACHE_LINE_SIZE);
833 GC_ASSERT((word)limit >= (word)current_p);
834 deferred = *(word *)limit;
835 FIXUP_POINTER(deferred);
836 limit -= ALIGNMENT;
837 if (deferred >= (word)least_ha && deferred < (word)greatest_ha) {
838 PREFETCH((ptr_t)deferred);
839 break;
840 }
841 if ((word)current_p > (word)limit) goto next_object;
842 /* Unroll once, so we don't do too many of the prefetches */
843 /* based on limit. */
844 deferred = *(word *)limit;
845 FIXUP_POINTER(deferred);
846 limit -= ALIGNMENT;
847 if (deferred >= (word)least_ha && deferred < (word)greatest_ha) {
848 PREFETCH((ptr_t)deferred);
849 break;
850 }
851 if ((word)current_p > (word)limit) goto next_object;
852 }
853 # endif
854
855 while ((word)current_p <= (word)limit) {
856 /* Empirically, unrolling this loop doesn't help a lot. */
857 /* Since PUSH_CONTENTS expands to a lot of code, */
858 /* we don't. */
859 current = *(word *)current_p;
860 FIXUP_POINTER(current);
861 PREFETCH(current_p + PREF_DIST*CACHE_LINE_SIZE);
862 if (current >= (word)least_ha && current < (word)greatest_ha) {
863 /* Prefetch the contents of the object we just pushed. It's */
864 /* likely we will need them soon. */
865 PREFETCH((ptr_t)current);
866 # ifdef ENABLE_TRACE
867 if (GC_trace_addr == current_p) {
868 GC_log_printf("GC #%u: considering(1) %p -> %p\n",
869 (unsigned)GC_gc_no, current_p, (ptr_t)current);
870 }
871 # endif /* ENABLE_TRACE */
872 PUSH_CONTENTS((ptr_t)current, mark_stack_top,
873 mark_stack_limit, current_p, exit2);
874 }
875 current_p += ALIGNMENT;
876 }
877
878 # ifndef SMALL_CONFIG
879 /* We still need to mark the entry we previously prefetched. */
880 /* We already know that it passes the preliminary pointer */
881 /* validity test. */
882 # ifdef ENABLE_TRACE
883 if (GC_trace_addr == current_p) {
884 GC_log_printf("GC #%u: considering(2) %p -> %p\n",
885 (unsigned)GC_gc_no, current_p, (ptr_t)deferred);
886 }
887 # endif /* ENABLE_TRACE */
888 PUSH_CONTENTS((ptr_t)deferred, mark_stack_top,
889 mark_stack_limit, current_p, exit4);
890 next_object:;
891 # endif
892 }
893 }
894 return mark_stack_top;
895 }
896
897 #ifdef PARALLEL_MARK
898
899 STATIC GC_bool GC_help_wanted = FALSE; /* Protected by mark lock */
900 STATIC unsigned GC_helper_count = 0; /* Number of running helpers. */
901 /* Protected by mark lock */
902 STATIC unsigned GC_active_count = 0; /* Number of active helpers. */
903 /* Protected by mark lock */
904 /* May increase and decrease */
905 /* within each mark cycle. But */
906 /* once it returns to 0, it */
907 /* stays zero for the cycle. */
908
909 GC_INNER word GC_mark_no = 0;
910
911 #define LOCAL_MARK_STACK_SIZE HBLKSIZE
912 /* Under normal circumstances, this is big enough to guarantee */
913 /* we don't overflow half of it in a single call to */
914 /* GC_mark_from. */
915
916 /* Wait all markers to finish initialization (i.e. store */
917 /* marker_[b]sp, marker_mach_threads, GC_marker_Id). */
GC_wait_for_markers_init(void)918 GC_INNER void GC_wait_for_markers_init(void)
919 {
920 word count;
921
922 if (GC_markers_m1 == 0)
923 return;
924
925 /* Reuse marker lock and builders count to synchronize */
926 /* marker threads startup. */
927 GC_acquire_mark_lock();
928 GC_fl_builder_count += (word)GC_markers_m1;
929 count = GC_fl_builder_count;
930 GC_release_mark_lock();
931 if (count != 0)
932 GC_wait_for_reclaim();
933 }
934
935 /* Steal mark stack entries starting at mse low into mark stack local */
936 /* until we either steal mse high, or we have max entries. */
937 /* Return a pointer to the top of the local mark stack. */
938 /* *next is replaced by a pointer to the next unscanned mark stack */
939 /* entry. */
GC_steal_mark_stack(mse * low,mse * high,mse * local,unsigned max,mse ** next)940 STATIC mse * GC_steal_mark_stack(mse * low, mse * high, mse * local,
941 unsigned max, mse **next)
942 {
943 mse *p;
944 mse *top = local - 1;
945 unsigned i = 0;
946
947 GC_ASSERT((word)high >= (word)(low - 1)
948 && (word)(high - low + 1) <= GC_mark_stack_size);
949 for (p = low; (word)p <= (word)high && i <= max; ++p) {
950 word descr = (word)AO_load(&p->mse_descr.ao);
951 if (descr != 0) {
952 /* Must be ordered after read of descr: */
953 AO_store_release_write(&p->mse_descr.ao, 0);
954 /* More than one thread may get this entry, but that's only */
955 /* a minor performance problem. */
956 ++top;
957 top -> mse_descr.w = descr;
958 top -> mse_start = p -> mse_start;
959 GC_ASSERT((top->mse_descr.w & GC_DS_TAGS) != GC_DS_LENGTH ||
960 top->mse_descr.w < (word)GC_greatest_plausible_heap_addr
961 - (word)GC_least_plausible_heap_addr);
962 /* If this is a big object, count it as */
963 /* size/256 + 1 objects. */
964 ++i;
965 if ((descr & GC_DS_TAGS) == GC_DS_LENGTH) i += (int)(descr >> 8);
966 }
967 }
968 *next = p;
969 return top;
970 }
971
972 /* Copy back a local mark stack. */
973 /* low and high are inclusive bounds. */
GC_return_mark_stack(mse * low,mse * high)974 STATIC void GC_return_mark_stack(mse * low, mse * high)
975 {
976 mse * my_top;
977 mse * my_start;
978 size_t stack_size;
979
980 if ((word)high < (word)low) return;
981 stack_size = high - low + 1;
982 GC_acquire_mark_lock();
983 my_top = GC_mark_stack_top; /* Concurrent modification impossible. */
984 my_start = my_top + 1;
985 if ((word)(my_start - GC_mark_stack + stack_size)
986 > (word)GC_mark_stack_size) {
987 GC_COND_LOG_PRINTF("No room to copy back mark stack\n");
988 GC_mark_state = MS_INVALID;
989 GC_mark_stack_too_small = TRUE;
990 /* We drop the local mark stack. We'll fix things later. */
991 } else {
992 BCOPY(low, my_start, stack_size * sizeof(mse));
993 GC_ASSERT((mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top))
994 == my_top);
995 AO_store_release_write((volatile AO_t *)(&GC_mark_stack_top),
996 (AO_t)(my_top + stack_size));
997 /* Ensures visibility of previously written stack contents. */
998 }
999 GC_release_mark_lock();
1000 GC_notify_all_marker();
1001 }
1002
1003 /* Mark from the local mark stack. */
1004 /* On return, the local mark stack is empty. */
1005 /* But this may be achieved by copying the */
1006 /* local mark stack back into the global one. */
GC_do_local_mark(mse * local_mark_stack,mse * local_top)1007 STATIC void GC_do_local_mark(mse *local_mark_stack, mse *local_top)
1008 {
1009 unsigned n;
1010 # define N_LOCAL_ITERS 1
1011
1012 # ifdef GC_ASSERTIONS
1013 /* Make sure we don't hold mark lock. */
1014 GC_acquire_mark_lock();
1015 GC_release_mark_lock();
1016 # endif
1017 for (;;) {
1018 for (n = 0; n < N_LOCAL_ITERS; ++n) {
1019 local_top = GC_mark_from(local_top, local_mark_stack,
1020 local_mark_stack + LOCAL_MARK_STACK_SIZE);
1021 if ((word)local_top < (word)local_mark_stack) return;
1022 if ((word)(local_top - local_mark_stack)
1023 >= LOCAL_MARK_STACK_SIZE / 2) {
1024 GC_return_mark_stack(local_mark_stack, local_top);
1025 return;
1026 }
1027 }
1028 if ((word)AO_load((volatile AO_t *)&GC_mark_stack_top)
1029 < (word)AO_load(&GC_first_nonempty)
1030 && GC_active_count < GC_helper_count
1031 && (word)local_top > (word)(local_mark_stack + 1)) {
1032 /* Try to share the load, since the main stack is empty, */
1033 /* and helper threads are waiting for a refill. */
1034 /* The entries near the bottom of the stack are likely */
1035 /* to require more work. Thus we return those, even though */
1036 /* it's harder. */
1037 mse * new_bottom = local_mark_stack
1038 + (local_top - local_mark_stack)/2;
1039 GC_ASSERT((word)new_bottom > (word)local_mark_stack
1040 && (word)new_bottom < (word)local_top);
1041 GC_return_mark_stack(local_mark_stack, new_bottom - 1);
1042 memmove(local_mark_stack, new_bottom,
1043 (local_top - new_bottom + 1) * sizeof(mse));
1044 local_top -= (new_bottom - local_mark_stack);
1045 }
1046 }
1047 }
1048
1049 #define ENTRIES_TO_GET 5
1050
1051 /* Mark using the local mark stack until the global mark stack is empty */
1052 /* and there are no active workers. Update GC_first_nonempty to reflect */
1053 /* progress. */
1054 /* Caller does not hold mark lock. */
1055 /* Caller has already incremented GC_helper_count. We decrement it, */
1056 /* and maintain GC_active_count. */
GC_mark_local(mse * local_mark_stack,int id)1057 STATIC void GC_mark_local(mse *local_mark_stack, int id)
1058 {
1059 mse * my_first_nonempty;
1060
1061 GC_acquire_mark_lock();
1062 GC_active_count++;
1063 my_first_nonempty = (mse *)AO_load(&GC_first_nonempty);
1064 GC_ASSERT((word)GC_mark_stack <= (word)my_first_nonempty);
1065 GC_ASSERT((word)my_first_nonempty
1066 <= (word)AO_load((volatile AO_t *)&GC_mark_stack_top) + sizeof(mse));
1067 GC_VERBOSE_LOG_PRINTF("Starting mark helper %lu\n", (unsigned long)id);
1068 GC_release_mark_lock();
1069 for (;;) {
1070 size_t n_on_stack;
1071 unsigned n_to_get;
1072 mse * my_top;
1073 mse * local_top;
1074 mse * global_first_nonempty = (mse *)AO_load(&GC_first_nonempty);
1075
1076 GC_ASSERT((word)my_first_nonempty >= (word)GC_mark_stack &&
1077 (word)my_first_nonempty <=
1078 (word)AO_load((volatile AO_t *)&GC_mark_stack_top)
1079 + sizeof(mse));
1080 GC_ASSERT((word)global_first_nonempty >= (word)GC_mark_stack &&
1081 (word)global_first_nonempty <=
1082 (word)AO_load((volatile AO_t *)&GC_mark_stack_top)
1083 + sizeof(mse));
1084 if ((word)my_first_nonempty < (word)global_first_nonempty) {
1085 my_first_nonempty = global_first_nonempty;
1086 } else if ((word)global_first_nonempty < (word)my_first_nonempty) {
1087 AO_compare_and_swap(&GC_first_nonempty,
1088 (AO_t) global_first_nonempty,
1089 (AO_t) my_first_nonempty);
1090 /* If this fails, we just go ahead, without updating */
1091 /* GC_first_nonempty. */
1092 }
1093 /* Perhaps we should also update GC_first_nonempty, if it */
1094 /* is less. But that would require using atomic updates. */
1095 my_top = (mse *)AO_load_acquire((volatile AO_t *)(&GC_mark_stack_top));
1096 n_on_stack = my_top - my_first_nonempty + 1;
1097 if (0 == n_on_stack) {
1098 GC_acquire_mark_lock();
1099 my_top = GC_mark_stack_top;
1100 /* Asynchronous modification impossible here, */
1101 /* since we hold mark lock. */
1102 n_on_stack = my_top - my_first_nonempty + 1;
1103 if (0 == n_on_stack) {
1104 GC_active_count--;
1105 GC_ASSERT(GC_active_count <= GC_helper_count);
1106 /* Other markers may redeposit objects */
1107 /* on the stack. */
1108 if (0 == GC_active_count) GC_notify_all_marker();
1109 while (GC_active_count > 0
1110 && (word)AO_load(&GC_first_nonempty)
1111 > (word)GC_mark_stack_top) {
1112 /* We will be notified if either GC_active_count */
1113 /* reaches zero, or if more objects are pushed on */
1114 /* the global mark stack. */
1115 GC_wait_marker();
1116 }
1117 if (GC_active_count == 0
1118 && (word)AO_load(&GC_first_nonempty)
1119 > (word)GC_mark_stack_top) {
1120 GC_bool need_to_notify = FALSE;
1121 /* The above conditions can't be falsified while we */
1122 /* hold the mark lock, since neither */
1123 /* GC_active_count nor GC_mark_stack_top can */
1124 /* change. GC_first_nonempty can only be */
1125 /* incremented asynchronously. Thus we know that */
1126 /* both conditions actually held simultaneously. */
1127 GC_helper_count--;
1128 if (0 == GC_helper_count) need_to_notify = TRUE;
1129 GC_VERBOSE_LOG_PRINTF("Finished mark helper %lu\n",
1130 (unsigned long)id);
1131 GC_release_mark_lock();
1132 if (need_to_notify) GC_notify_all_marker();
1133 return;
1134 }
1135 /* else there's something on the stack again, or */
1136 /* another helper may push something. */
1137 GC_active_count++;
1138 GC_ASSERT(GC_active_count > 0);
1139 GC_release_mark_lock();
1140 continue;
1141 } else {
1142 GC_release_mark_lock();
1143 }
1144 }
1145 n_to_get = ENTRIES_TO_GET;
1146 if (n_on_stack < 2 * ENTRIES_TO_GET) n_to_get = 1;
1147 local_top = GC_steal_mark_stack(my_first_nonempty, my_top,
1148 local_mark_stack, n_to_get,
1149 &my_first_nonempty);
1150 GC_ASSERT((word)my_first_nonempty >= (word)GC_mark_stack &&
1151 (word)my_first_nonempty <=
1152 (word)AO_load((volatile AO_t *)&GC_mark_stack_top)
1153 + sizeof(mse));
1154 GC_do_local_mark(local_mark_stack, local_top);
1155 }
1156 }
1157
1158 GC_INNER mse GC_local_mark_stacks[MAX_MARKERS+1][LOCAL_MARK_STACK_SIZE];
1159
1160 /* Perform Parallel mark. */
1161 /* We hold the GC lock, not the mark lock. */
1162 /* Currently runs until the mark stack is */
1163 /* empty. */
GC_do_parallel_mark(void)1164 STATIC void GC_do_parallel_mark(void)
1165 {
1166 mse *local_mark_stack = &(GC_local_mark_stacks[0][0]);
1167 /* Note: local_mark_stack is quite big (up to 128 KiB). */
1168
1169 GC_acquire_mark_lock();
1170 GC_ASSERT(I_HOLD_LOCK());
1171 /* This could be a GC_ASSERT, but it seems safer to keep it on */
1172 /* all the time, especially since it's cheap. */
1173 if (GC_help_wanted || GC_active_count != 0 || GC_helper_count != 0)
1174 ABORT("Tried to start parallel mark in bad state");
1175 GC_VERBOSE_LOG_PRINTF("Starting marking for mark phase number %lu\n",
1176 (unsigned long)GC_mark_no);
1177 GC_first_nonempty = (AO_t)GC_mark_stack;
1178 GC_active_count = 0;
1179 GC_helper_count = 1;
1180 GC_help_wanted = TRUE;
1181 GC_release_mark_lock();
1182 GC_notify_all_marker();
1183 /* Wake up potential helpers. */
1184 GC_mark_local(local_mark_stack, 0);
1185 GC_acquire_mark_lock();
1186 GC_help_wanted = FALSE;
1187 /* Done; clean up. */
1188 while (GC_helper_count > 0) {
1189 GC_wait_marker();
1190 }
1191 /* GC_helper_count cannot be incremented while GC_help_wanted == FALSE */
1192 GC_VERBOSE_LOG_PRINTF("Finished marking for mark phase number %lu\n",
1193 (unsigned long)GC_mark_no);
1194 GC_mark_no++;
1195 GC_release_mark_lock();
1196 GC_notify_all_marker();
1197 }
1198
1199 /* Try to help out the marker, if it's running. */
1200 /* We do not hold the GC lock, but the requestor does. */
GC_help_marker(word my_mark_no)1201 GC_INNER void GC_help_marker(word my_mark_no)
1202 {
1203 unsigned my_id;
1204 mse *local_mark_stack;
1205 /* Note: local_mark_stack is quite big (up to 128 KiB). */
1206
1207 if (!GC_parallel) return;
1208
1209 GC_acquire_mark_lock();
1210 while (GC_mark_no < my_mark_no
1211 || (!GC_help_wanted && GC_mark_no == my_mark_no)) {
1212 GC_wait_marker();
1213 }
1214 my_id = GC_helper_count;
1215 local_mark_stack = &(GC_local_mark_stacks[my_id][0]);
1216 if (GC_mark_no != my_mark_no || my_id > (unsigned)GC_markers_m1) {
1217 /* Second test is useful only if original threads can also */
1218 /* act as helpers. Under Linux they can't. */
1219 GC_release_mark_lock();
1220 return;
1221 }
1222 GC_helper_count = my_id + 1;
1223 GC_release_mark_lock();
1224 GC_mark_local(local_mark_stack, my_id);
1225 /* GC_mark_local decrements GC_helper_count. */
1226 }
1227
1228 #endif /* PARALLEL_MARK */
1229
1230 /* Allocate or reallocate space for mark stack of size n entries. */
1231 /* May silently fail. */
alloc_mark_stack(size_t n)1232 static void alloc_mark_stack(size_t n)
1233 {
1234 mse * new_stack = (mse *)GC_scratch_alloc(n * sizeof(struct GC_ms_entry));
1235 # ifdef GWW_VDB
1236 /* Don't recycle a stack segment obtained with the wrong flags. */
1237 /* Win32 GetWriteWatch requires the right kind of memory. */
1238 static GC_bool GC_incremental_at_stack_alloc = FALSE;
1239 GC_bool recycle_old = (!GC_incremental || GC_incremental_at_stack_alloc);
1240
1241 GC_incremental_at_stack_alloc = GC_incremental;
1242 # else
1243 # define recycle_old TRUE
1244 # endif
1245
1246 GC_mark_stack_too_small = FALSE;
1247 if (GC_mark_stack_size != 0) {
1248 if (new_stack != 0) {
1249 if (recycle_old) {
1250 /* Recycle old space */
1251 size_t page_offset = (word)GC_mark_stack & (GC_page_size - 1);
1252 size_t size = GC_mark_stack_size * sizeof(struct GC_ms_entry);
1253 size_t displ = 0;
1254
1255 if (0 != page_offset) displ = GC_page_size - page_offset;
1256 size = (size - displ) & ~(GC_page_size - 1);
1257 if (size > 0) {
1258 GC_add_to_heap((struct hblk *)
1259 ((word)GC_mark_stack + displ), (word)size);
1260 }
1261 }
1262 GC_mark_stack = new_stack;
1263 GC_mark_stack_size = n;
1264 /* FIXME: Do we need some way to reset GC_mark_stack_size? */
1265 GC_mark_stack_limit = new_stack + n;
1266 GC_COND_LOG_PRINTF("Grew mark stack to %lu frames\n",
1267 (unsigned long)GC_mark_stack_size);
1268 } else {
1269 WARN("Failed to grow mark stack to %" WARN_PRIdPTR " frames\n", n);
1270 }
1271 } else {
1272 if (new_stack == 0) {
1273 GC_err_printf("No space for mark stack\n");
1274 EXIT();
1275 }
1276 GC_mark_stack = new_stack;
1277 GC_mark_stack_size = n;
1278 GC_mark_stack_limit = new_stack + n;
1279 }
1280 GC_mark_stack_top = GC_mark_stack-1;
1281 }
1282
GC_mark_init(void)1283 GC_INNER void GC_mark_init(void)
1284 {
1285 alloc_mark_stack(INITIAL_MARK_STACK_SIZE);
1286 # ifdef PARALLEL_MARK
1287 GC_exclude_static_roots_inner(GC_local_mark_stacks,
1288 (char *)GC_local_mark_stacks + sizeof(GC_local_mark_stacks));
1289 # endif
1290 }
1291
1292 /*
1293 * Push all locations between b and t onto the mark stack.
1294 * b is the first location to be checked. t is one past the last
1295 * location to be checked.
1296 * Should only be used if there is no possibility of mark stack
1297 * overflow.
1298 */
GC_push_all(char * bottom,char * top)1299 GC_API void GC_CALL GC_push_all(char *bottom, char *top)
1300 {
1301 register word length;
1302
1303 bottom = (char *)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1304 top = (char *)(((word) top) & ~(ALIGNMENT-1));
1305 if ((word)bottom >= (word)top) return;
1306
1307 GC_mark_stack_top++;
1308 if ((word)GC_mark_stack_top >= (word)GC_mark_stack_limit) {
1309 ABORT("Unexpected mark stack overflow");
1310 }
1311 length = top - bottom;
1312 # if GC_DS_TAGS > ALIGNMENT - 1
1313 length += GC_DS_TAGS;
1314 length &= ~GC_DS_TAGS;
1315 # endif
1316 GC_mark_stack_top -> mse_start = bottom;
1317 GC_mark_stack_top -> mse_descr.w = length;
1318 }
1319
1320 #ifndef GC_DISABLE_INCREMENTAL
1321
1322 /* Analogous to the above, but push only those pages h with */
1323 /* dirty_fn(h) != 0. We use GC_push_all to actually push the block. */
1324 /* Used both to selectively push dirty pages, or to push a block in */
1325 /* piecemeal fashion, to allow for more marking concurrency. */
1326 /* Will not overflow mark stack if GC_push_all pushes a small fixed */
1327 /* number of entries. (This is invoked only if GC_push_all pushes */
1328 /* a single entry, or if it marks each object before pushing it, thus */
1329 /* ensuring progress in the event of a stack overflow.) */
GC_push_selected(ptr_t bottom,ptr_t top,GC_bool (* dirty_fn)(struct hblk *))1330 STATIC void GC_push_selected(ptr_t bottom, ptr_t top,
1331 GC_bool (*dirty_fn)(struct hblk *))
1332 {
1333 struct hblk * h;
1334
1335 bottom = (ptr_t)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1336 top = (ptr_t)(((word) top) & ~(ALIGNMENT-1));
1337 if ((word)bottom >= (word)top) return;
1338
1339 h = HBLKPTR(bottom + HBLKSIZE);
1340 if ((word)top <= (word)h) {
1341 if ((*dirty_fn)(h-1)) {
1342 GC_push_all(bottom, top);
1343 }
1344 return;
1345 }
1346 if ((*dirty_fn)(h-1)) {
1347 GC_push_all(bottom, (ptr_t)h);
1348 }
1349
1350 while ((word)(h+1) <= (word)top) {
1351 if ((*dirty_fn)(h)) {
1352 if ((word)(GC_mark_stack_top - GC_mark_stack)
1353 > 3 * GC_mark_stack_size / 4) {
1354 /* Danger of mark stack overflow */
1355 GC_push_all((ptr_t)h, top);
1356 return;
1357 } else {
1358 GC_push_all((ptr_t)h, (ptr_t)(h+1));
1359 }
1360 }
1361 h++;
1362 }
1363
1364 if ((ptr_t)h != top && (*dirty_fn)(h)) {
1365 GC_push_all((ptr_t)h, top);
1366 }
1367 if ((word)GC_mark_stack_top >= (word)GC_mark_stack_limit) {
1368 ABORT("Unexpected mark stack overflow");
1369 }
1370 }
1371
GC_push_conditional(char * bottom,char * top,int all)1372 GC_API void GC_CALL GC_push_conditional(char *bottom, char *top, int all)
1373 {
1374 if (!all) {
1375 GC_push_selected((ptr_t)bottom, (ptr_t)top, GC_page_was_dirty);
1376 } else {
1377 # ifdef PROC_VDB
1378 if (GC_dirty_maintained) {
1379 /* Pages that were never dirtied cannot contain pointers. */
1380 GC_push_selected((ptr_t)bottom, (ptr_t)top, GC_page_was_ever_dirty);
1381 } else
1382 # endif
1383 /* else */ {
1384 GC_push_all(bottom, top);
1385 }
1386 }
1387 }
1388 #else
GC_push_conditional(char * bottom,char * top,int all GC_ATTR_UNUSED)1389 GC_API void GC_CALL GC_push_conditional(char *bottom, char *top,
1390 int all GC_ATTR_UNUSED)
1391 {
1392 GC_push_all(bottom, top);
1393 }
1394 #endif /* GC_DISABLE_INCREMENTAL */
1395
1396 #if defined(MSWIN32) || defined(MSWINCE)
GC_push_one(word p)1397 void __cdecl GC_push_one(word p)
1398 #else
1399 void GC_push_one(word p)
1400 #endif
1401 {
1402 GC_PUSH_ONE_STACK(p, MARKED_FROM_REGISTER);
1403 }
1404
GC_mark_and_push(void * obj,mse * mark_stack_ptr,mse * mark_stack_limit,void ** src GC_ATTR_UNUSED)1405 GC_API struct GC_ms_entry * GC_CALL GC_mark_and_push(void *obj,
1406 mse *mark_stack_ptr,
1407 mse *mark_stack_limit,
1408 void ** src GC_ATTR_UNUSED)
1409 {
1410 hdr * hhdr;
1411
1412 PREFETCH(obj);
1413 GET_HDR(obj, hhdr);
1414 if ((EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr), FALSE)
1415 && (!GC_all_interior_pointers
1416 || NULL == (hhdr = GC_find_header(GC_base(obj)))))
1417 || EXPECT(HBLK_IS_FREE(hhdr), FALSE)) {
1418 GC_ADD_TO_BLACK_LIST_NORMAL(obj, (ptr_t)src);
1419 return mark_stack_ptr;
1420 }
1421
1422 PUSH_CONTENTS_HDR(obj, mark_stack_ptr /* modified */, mark_stack_limit,
1423 (ptr_t)src, was_marked, hhdr, TRUE);
1424 was_marked:
1425 return mark_stack_ptr;
1426 }
1427
1428 #if defined(MANUAL_VDB) && defined(THREADS)
1429 void GC_dirty(ptr_t p);
1430 #endif
1431
1432 /* Mark and push (i.e. gray) a single object p onto the main */
1433 /* mark stack. Consider p to be valid if it is an interior */
1434 /* pointer. */
1435 /* The object p has passed a preliminary pointer validity */
1436 /* test, but we do not definitely know whether it is valid. */
1437 /* Mark bits are NOT atomically updated. Thus this must be the */
1438 /* only thread setting them. */
1439 # if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
GC_mark_and_push_stack(ptr_t p,ptr_t source)1440 GC_INNER void GC_mark_and_push_stack(ptr_t p, ptr_t source)
1441 # else
1442 GC_INNER void GC_mark_and_push_stack(ptr_t p)
1443 # define source ((ptr_t)0)
1444 # endif
1445 {
1446 hdr * hhdr;
1447 ptr_t r = p;
1448
1449 PREFETCH(p);
1450 GET_HDR(p, hhdr);
1451 if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr), FALSE)) {
1452 if (hhdr != 0) {
1453 r = GC_base(p);
1454 hhdr = HDR(r);
1455 }
1456 if (hhdr == 0) {
1457 GC_ADD_TO_BLACK_LIST_STACK(p, source);
1458 return;
1459 }
1460 }
1461 if (EXPECT(HBLK_IS_FREE(hhdr), FALSE)) {
1462 GC_ADD_TO_BLACK_LIST_NORMAL(p, source);
1463 return;
1464 }
1465 # if defined(MANUAL_VDB) && defined(THREADS)
1466 /* Pointer is on the stack. We may have dirtied the object */
1467 /* it points to, but not yet have called GC_dirty(); */
1468 GC_dirty(p); /* Implicitly affects entire object. */
1469 # endif
1470 PUSH_CONTENTS_HDR(r, GC_mark_stack_top, GC_mark_stack_limit,
1471 source, mark_and_push_exit, hhdr, FALSE);
1472 mark_and_push_exit: ;
1473 /* We silently ignore pointers to near the end of a block, */
1474 /* which is very mildly suboptimal. */
1475 /* FIXME: We should probably add a header word to address */
1476 /* this. */
1477 }
1478 # undef source
1479
1480 # ifdef TRACE_BUF
1481
1482 # define TRACE_ENTRIES 1000
1483
1484 struct trace_entry {
1485 char * kind;
1486 word gc_no;
1487 word bytes_allocd;
1488 word arg1;
1489 word arg2;
1490 } GC_trace_buf[TRACE_ENTRIES];
1491
1492 int GC_trace_buf_ptr = 0;
1493
GC_add_trace_entry(char * kind,word arg1,word arg2)1494 void GC_add_trace_entry(char *kind, word arg1, word arg2)
1495 {
1496 GC_trace_buf[GC_trace_buf_ptr].kind = kind;
1497 GC_trace_buf[GC_trace_buf_ptr].gc_no = GC_gc_no;
1498 GC_trace_buf[GC_trace_buf_ptr].bytes_allocd = GC_bytes_allocd;
1499 GC_trace_buf[GC_trace_buf_ptr].arg1 = arg1 ^ 0x80000000;
1500 GC_trace_buf[GC_trace_buf_ptr].arg2 = arg2 ^ 0x80000000;
1501 GC_trace_buf_ptr++;
1502 if (GC_trace_buf_ptr >= TRACE_ENTRIES) GC_trace_buf_ptr = 0;
1503 }
1504
GC_print_trace_inner(word gc_no)1505 void GC_print_trace_inner(word gc_no)
1506 {
1507 int i;
1508 struct trace_entry *p;
1509
1510 for (i = GC_trace_buf_ptr-1; i != GC_trace_buf_ptr; i--) {
1511 if (i < 0) i = TRACE_ENTRIES-1;
1512 p = GC_trace_buf + i;
1513 if (p -> gc_no < gc_no || p -> kind == 0) {
1514 return;
1515 }
1516 GC_printf("Trace:%s (gc:%u, bytes:%lu) 0x%lX, 0x%lX\n",
1517 p -> kind, (unsigned)p -> gc_no,
1518 (unsigned long)p -> bytes_allocd,
1519 (long)p->arg1 ^ 0x80000000L, (long)p->arg2 ^ 0x80000000L);
1520 }
1521 GC_printf("Trace incomplete\n");
1522 }
1523
GC_print_trace(word gc_no)1524 void GC_print_trace(word gc_no)
1525 {
1526 DCL_LOCK_STATE;
1527
1528 LOCK();
1529 GC_print_trace_inner(gc_no);
1530 UNLOCK();
1531 }
1532
1533 # endif /* TRACE_BUF */
1534
1535 /*
1536 * A version of GC_push_all that treats all interior pointers as valid
1537 * and scans the entire region immediately, in case the contents
1538 * change.
1539 */
GC_push_all_eager(ptr_t bottom,ptr_t top)1540 GC_INNER void GC_push_all_eager(ptr_t bottom, ptr_t top)
1541 {
1542 word * b = (word *)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1543 word * t = (word *)(((word) top) & ~(ALIGNMENT-1));
1544 register word *p;
1545 register word q;
1546 register word *lim;
1547 register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
1548 register ptr_t least_ha = GC_least_plausible_heap_addr;
1549 # define GC_greatest_plausible_heap_addr greatest_ha
1550 # define GC_least_plausible_heap_addr least_ha
1551
1552 if (top == 0) return;
1553 /* check all pointers in range and push if they appear */
1554 /* to be valid. */
1555 lim = t - 1 /* longword */;
1556 for (p = b; (word)p <= (word)lim;
1557 p = (word *)(((ptr_t)p) + ALIGNMENT)) {
1558 q = *p;
1559 GC_PUSH_ONE_STACK(q, p);
1560 }
1561 # undef GC_greatest_plausible_heap_addr
1562 # undef GC_least_plausible_heap_addr
1563 }
1564
GC_push_all_stack(ptr_t bottom,ptr_t top)1565 GC_INNER void GC_push_all_stack(ptr_t bottom, ptr_t top)
1566 {
1567 # if defined(THREADS) && defined(MPROTECT_VDB)
1568 GC_push_all_eager(bottom, top);
1569 # else
1570 if (!NEED_FIXUP_POINTER && GC_all_interior_pointers) {
1571 GC_push_all(bottom, top);
1572 } else {
1573 GC_push_all_eager(bottom, top);
1574 }
1575 # endif
1576 }
1577
1578 #if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES) && \
1579 defined(MARK_BIT_PER_GRANULE)
1580 # if GC_GRANULE_WORDS == 1
1581 # define USE_PUSH_MARKED_ACCELERATORS
1582 # define PUSH_GRANULE(q) \
1583 do { \
1584 word qcontents = (q)[0]; \
1585 GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1586 } while (0)
1587 # elif GC_GRANULE_WORDS == 2
1588 # define USE_PUSH_MARKED_ACCELERATORS
1589 # define PUSH_GRANULE(q) \
1590 do { \
1591 word qcontents = (q)[0]; \
1592 GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1593 qcontents = (q)[1]; \
1594 GC_PUSH_ONE_HEAP(qcontents, (q)+1, GC_mark_stack_top); \
1595 } while (0)
1596 # elif GC_GRANULE_WORDS == 4
1597 # define USE_PUSH_MARKED_ACCELERATORS
1598 # define PUSH_GRANULE(q) \
1599 do { \
1600 word qcontents = (q)[0]; \
1601 GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1602 qcontents = (q)[1]; \
1603 GC_PUSH_ONE_HEAP(qcontents, (q)+1, GC_mark_stack_top); \
1604 qcontents = (q)[2]; \
1605 GC_PUSH_ONE_HEAP(qcontents, (q)+2, GC_mark_stack_top); \
1606 qcontents = (q)[3]; \
1607 GC_PUSH_ONE_HEAP(qcontents, (q)+3, GC_mark_stack_top); \
1608 } while (0)
1609 # endif
1610 #endif /* !USE_MARK_BYTES && MARK_BIT_PER_GRANULE */
1611
1612 #ifdef USE_PUSH_MARKED_ACCELERATORS
1613 /* Push all objects reachable from marked objects in the given block */
1614 /* containing objects of size 1 granule. */
GC_push_marked1(struct hblk * h,hdr * hhdr)1615 STATIC void GC_push_marked1(struct hblk *h, hdr *hhdr)
1616 {
1617 word * mark_word_addr = &(hhdr->hb_marks[0]);
1618 word *p;
1619 word *plim;
1620 word *q;
1621 word mark_word;
1622
1623 /* Allow registers to be used for some frequently accessed */
1624 /* global variables. Otherwise aliasing issues are likely */
1625 /* to prevent that. */
1626 ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
1627 ptr_t least_ha = GC_least_plausible_heap_addr;
1628 mse * mark_stack_top = GC_mark_stack_top;
1629 mse * mark_stack_limit = GC_mark_stack_limit;
1630
1631 # undef GC_mark_stack_top
1632 # undef GC_mark_stack_limit
1633 # define GC_mark_stack_top mark_stack_top
1634 # define GC_mark_stack_limit mark_stack_limit
1635 # define GC_greatest_plausible_heap_addr greatest_ha
1636 # define GC_least_plausible_heap_addr least_ha
1637
1638 p = (word *)(h->hb_body);
1639 plim = (word *)(((word)h) + HBLKSIZE);
1640
1641 /* go through all words in block */
1642 while ((word)p < (word)plim) {
1643 mark_word = *mark_word_addr++;
1644 q = p;
1645 while(mark_word != 0) {
1646 if (mark_word & 1) {
1647 PUSH_GRANULE(q);
1648 }
1649 q += GC_GRANULE_WORDS;
1650 mark_word >>= 1;
1651 }
1652 p += WORDSZ*GC_GRANULE_WORDS;
1653 }
1654
1655 # undef GC_greatest_plausible_heap_addr
1656 # undef GC_least_plausible_heap_addr
1657 # undef GC_mark_stack_top
1658 # undef GC_mark_stack_limit
1659 # define GC_mark_stack_limit GC_arrays._mark_stack_limit
1660 # define GC_mark_stack_top GC_arrays._mark_stack_top
1661 GC_mark_stack_top = mark_stack_top;
1662 }
1663
1664
1665 #ifndef UNALIGNED_PTRS
1666
1667 /* Push all objects reachable from marked objects in the given block */
1668 /* of size 2 (granules) objects. */
GC_push_marked2(struct hblk * h,hdr * hhdr)1669 STATIC void GC_push_marked2(struct hblk *h, hdr *hhdr)
1670 {
1671 word * mark_word_addr = &(hhdr->hb_marks[0]);
1672 word *p;
1673 word *plim;
1674 word *q;
1675 word mark_word;
1676
1677 ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
1678 ptr_t least_ha = GC_least_plausible_heap_addr;
1679 mse * mark_stack_top = GC_mark_stack_top;
1680 mse * mark_stack_limit = GC_mark_stack_limit;
1681
1682 # undef GC_mark_stack_top
1683 # undef GC_mark_stack_limit
1684 # define GC_mark_stack_top mark_stack_top
1685 # define GC_mark_stack_limit mark_stack_limit
1686 # define GC_greatest_plausible_heap_addr greatest_ha
1687 # define GC_least_plausible_heap_addr least_ha
1688
1689 p = (word *)(h->hb_body);
1690 plim = (word *)(((word)h) + HBLKSIZE);
1691
1692 /* go through all words in block */
1693 while ((word)p < (word)plim) {
1694 mark_word = *mark_word_addr++;
1695 q = p;
1696 while(mark_word != 0) {
1697 if (mark_word & 1) {
1698 PUSH_GRANULE(q);
1699 PUSH_GRANULE(q + GC_GRANULE_WORDS);
1700 }
1701 q += 2 * GC_GRANULE_WORDS;
1702 mark_word >>= 2;
1703 }
1704 p += WORDSZ*GC_GRANULE_WORDS;
1705 }
1706
1707 # undef GC_greatest_plausible_heap_addr
1708 # undef GC_least_plausible_heap_addr
1709 # undef GC_mark_stack_top
1710 # undef GC_mark_stack_limit
1711 # define GC_mark_stack_limit GC_arrays._mark_stack_limit
1712 # define GC_mark_stack_top GC_arrays._mark_stack_top
1713 GC_mark_stack_top = mark_stack_top;
1714 }
1715
1716 # if GC_GRANULE_WORDS < 4
1717 /* Push all objects reachable from marked objects in the given block */
1718 /* of size 4 (granules) objects. */
1719 /* There is a risk of mark stack overflow here. But we handle that. */
1720 /* And only unmarked objects get pushed, so it's not very likely. */
GC_push_marked4(struct hblk * h,hdr * hhdr)1721 STATIC void GC_push_marked4(struct hblk *h, hdr *hhdr)
1722 {
1723 word * mark_word_addr = &(hhdr->hb_marks[0]);
1724 word *p;
1725 word *plim;
1726 word *q;
1727 word mark_word;
1728
1729 ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
1730 ptr_t least_ha = GC_least_plausible_heap_addr;
1731 mse * mark_stack_top = GC_mark_stack_top;
1732 mse * mark_stack_limit = GC_mark_stack_limit;
1733
1734 # undef GC_mark_stack_top
1735 # undef GC_mark_stack_limit
1736 # define GC_mark_stack_top mark_stack_top
1737 # define GC_mark_stack_limit mark_stack_limit
1738 # define GC_greatest_plausible_heap_addr greatest_ha
1739 # define GC_least_plausible_heap_addr least_ha
1740
1741 p = (word *)(h->hb_body);
1742 plim = (word *)(((word)h) + HBLKSIZE);
1743
1744 /* go through all words in block */
1745 while ((word)p < (word)plim) {
1746 mark_word = *mark_word_addr++;
1747 q = p;
1748 while(mark_word != 0) {
1749 if (mark_word & 1) {
1750 PUSH_GRANULE(q);
1751 PUSH_GRANULE(q + GC_GRANULE_WORDS);
1752 PUSH_GRANULE(q + 2*GC_GRANULE_WORDS);
1753 PUSH_GRANULE(q + 3*GC_GRANULE_WORDS);
1754 }
1755 q += 4 * GC_GRANULE_WORDS;
1756 mark_word >>= 4;
1757 }
1758 p += WORDSZ*GC_GRANULE_WORDS;
1759 }
1760 # undef GC_greatest_plausible_heap_addr
1761 # undef GC_least_plausible_heap_addr
1762 # undef GC_mark_stack_top
1763 # undef GC_mark_stack_limit
1764 # define GC_mark_stack_limit GC_arrays._mark_stack_limit
1765 # define GC_mark_stack_top GC_arrays._mark_stack_top
1766 GC_mark_stack_top = mark_stack_top;
1767 }
1768
1769 #endif /* GC_GRANULE_WORDS < 4 */
1770
1771 #endif /* UNALIGNED_PTRS */
1772
1773 #endif /* USE_PUSH_MARKED_ACCELERATORS */
1774
1775 /* Push all objects reachable from marked objects in the given block */
GC_push_marked(struct hblk * h,hdr * hhdr)1776 STATIC void GC_push_marked(struct hblk *h, hdr *hhdr)
1777 {
1778 size_t sz = hhdr -> hb_sz;
1779 word descr = hhdr -> hb_descr;
1780 ptr_t p;
1781 word bit_no;
1782 ptr_t lim;
1783 mse * GC_mark_stack_top_reg;
1784 mse * mark_stack_limit = GC_mark_stack_limit;
1785
1786 /* Some quick shortcuts: */
1787 if ((/* 0 | */ GC_DS_LENGTH) == descr) return;
1788 if (GC_block_empty(hhdr)/* nothing marked */) return;
1789 GC_n_rescuing_pages++;
1790 GC_objects_are_marked = TRUE;
1791 if (sz > MAXOBJBYTES) {
1792 lim = h -> hb_body;
1793 } else {
1794 lim = (h + 1)->hb_body - sz;
1795 }
1796
1797 switch(BYTES_TO_GRANULES(sz)) {
1798 # if defined(USE_PUSH_MARKED_ACCELERATORS)
1799 case 1:
1800 GC_push_marked1(h, hhdr);
1801 break;
1802 # if !defined(UNALIGNED_PTRS)
1803 case 2:
1804 GC_push_marked2(h, hhdr);
1805 break;
1806 # if GC_GRANULE_WORDS < 4
1807 case 4:
1808 GC_push_marked4(h, hhdr);
1809 break;
1810 # endif
1811 # endif
1812 # endif
1813 default:
1814 GC_mark_stack_top_reg = GC_mark_stack_top;
1815 for (p = h -> hb_body, bit_no = 0; (word)p <= (word)lim;
1816 p += sz, bit_no += MARK_BIT_OFFSET(sz)) {
1817 if (mark_bit_from_hdr(hhdr, bit_no)) {
1818 /* Mark from fields inside the object */
1819 PUSH_OBJ(p, hhdr, GC_mark_stack_top_reg, mark_stack_limit);
1820 }
1821 }
1822 GC_mark_stack_top = GC_mark_stack_top_reg;
1823 }
1824 }
1825
1826 #ifdef ENABLE_DISCLAIM
1827 /* Unconditionally mark from all objects which have not been reclaimed. */
1828 /* This is useful in order to retain pointers which are reachable from */
1829 /* the disclaim notifiers. */
1830 /* */
1831 /* To determine whether an object has been reclaimed, we require that */
1832 /* any live object has a non-zero as one of the two lowest bits of the */
1833 /* first word. On the other hand, a reclaimed object is a members of */
1834 /* free-lists, and thus contains a word-aligned next-pointer as the */
1835 /* first word. */
GC_push_unconditionally(struct hblk * h,hdr * hhdr)1836 STATIC void GC_push_unconditionally(struct hblk *h, hdr *hhdr)
1837 {
1838 size_t sz = hhdr -> hb_sz;
1839 word descr = hhdr -> hb_descr;
1840 ptr_t p;
1841 ptr_t lim;
1842 mse * GC_mark_stack_top_reg;
1843 mse * mark_stack_limit = GC_mark_stack_limit;
1844
1845 if ((/* 0 | */ GC_DS_LENGTH) == descr)
1846 return;
1847
1848 GC_n_rescuing_pages++;
1849 GC_objects_are_marked = TRUE;
1850 if (sz > MAXOBJBYTES)
1851 lim = h -> hb_body;
1852 else
1853 lim = (h + 1)->hb_body - sz;
1854
1855 GC_mark_stack_top_reg = GC_mark_stack_top;
1856 for (p = h -> hb_body; (word)p <= (word)lim; p += sz)
1857 if ((*(GC_word *)p & 0x3) != 0)
1858 PUSH_OBJ(p, hhdr, GC_mark_stack_top_reg, mark_stack_limit);
1859 GC_mark_stack_top = GC_mark_stack_top_reg;
1860 }
1861 #endif /* ENABLE_DISCLAIM */
1862
1863 #ifndef GC_DISABLE_INCREMENTAL
1864 /* Test whether any page in the given block is dirty. */
GC_block_was_dirty(struct hblk * h,hdr * hhdr)1865 STATIC GC_bool GC_block_was_dirty(struct hblk *h, hdr *hhdr)
1866 {
1867 size_t sz = hhdr -> hb_sz;
1868
1869 if (sz <= MAXOBJBYTES) {
1870 return(GC_page_was_dirty(h));
1871 } else {
1872 ptr_t p = (ptr_t)h;
1873 while ((word)p < (word)h + sz) {
1874 if (GC_page_was_dirty((struct hblk *)p)) return(TRUE);
1875 p += HBLKSIZE;
1876 }
1877 return(FALSE);
1878 }
1879 }
1880 #endif /* GC_DISABLE_INCREMENTAL */
1881
1882 /* Similar to GC_push_marked, but skip over unallocated blocks */
1883 /* and return address of next plausible block. */
GC_push_next_marked(struct hblk * h)1884 STATIC struct hblk * GC_push_next_marked(struct hblk *h)
1885 {
1886 hdr * hhdr = HDR(h);
1887
1888 if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr) || HBLK_IS_FREE(hhdr), FALSE)) {
1889 h = GC_next_used_block(h);
1890 if (h == 0) return(0);
1891 hhdr = GC_find_header((ptr_t)h);
1892 }
1893 GC_push_marked(h, hhdr);
1894 return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
1895 }
1896
1897 #ifndef GC_DISABLE_INCREMENTAL
1898 /* Identical to above, but mark only from dirty pages */
GC_push_next_marked_dirty(struct hblk * h)1899 STATIC struct hblk * GC_push_next_marked_dirty(struct hblk *h)
1900 {
1901 hdr * hhdr = HDR(h);
1902
1903 if (!GC_dirty_maintained) ABORT("Dirty bits not set up");
1904 for (;;) {
1905 if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr)
1906 || HBLK_IS_FREE(hhdr), FALSE)) {
1907 h = GC_next_used_block(h);
1908 if (h == 0) return(0);
1909 hhdr = GC_find_header((ptr_t)h);
1910 }
1911 # ifdef STUBBORN_ALLOC
1912 if (hhdr -> hb_obj_kind == STUBBORN) {
1913 if (GC_page_was_changed(h) && GC_block_was_dirty(h, hhdr)) {
1914 break;
1915 }
1916 } else {
1917 if (GC_block_was_dirty(h, hhdr)) break;
1918 }
1919 # else
1920 if (GC_block_was_dirty(h, hhdr)) break;
1921 # endif
1922 h += OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz);
1923 hhdr = HDR(h);
1924 }
1925 GC_push_marked(h, hhdr);
1926 return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
1927 }
1928 #endif /* !GC_DISABLE_INCREMENTAL */
1929
1930 /* Similar to above, but for uncollectible pages. Needed since we */
1931 /* do not clear marks for such pages, even for full collections. */
GC_push_next_marked_uncollectable(struct hblk * h)1932 STATIC struct hblk * GC_push_next_marked_uncollectable(struct hblk *h)
1933 {
1934 hdr * hhdr = HDR(h);
1935
1936 for (;;) {
1937 if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr)
1938 || HBLK_IS_FREE(hhdr), FALSE)) {
1939 h = GC_next_used_block(h);
1940 if (h == 0) return(0);
1941 hhdr = GC_find_header((ptr_t)h);
1942 }
1943 if (hhdr -> hb_obj_kind == UNCOLLECTABLE) {
1944 GC_push_marked(h, hhdr);
1945 break;
1946 }
1947 # ifdef ENABLE_DISCLAIM
1948 if ((hhdr -> hb_flags & MARK_UNCONDITIONALLY) != 0) {
1949 GC_push_unconditionally(h, hhdr);
1950 break;
1951 }
1952 # endif
1953 h += OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz);
1954 hhdr = HDR(h);
1955 }
1956 return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
1957 }
1958