1 /*
2 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
3 * Copyright (c) 1991-1995 by Xerox Corporation. All rights reserved.
4 * Copyright (c) 2000 by Hewlett-Packard Company. All rights reserved.
5 *
6 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
7 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
8 *
9 * Permission is hereby granted to use or copy this program
10 * for any purpose, provided the above notices are retained on all copies.
11 * Permission to modify the code and to distribute modified code is granted,
12 * provided the above notices are retained, and a notice that the code was
13 * modified is included with the above copyright notice.
14 *
15 */
16
17 #if defined(__MINGW32__) && !defined(__MINGW_EXCPT_DEFINE_PSDK) \
18 && defined(__i386__) /* cannot use macros from gcconfig.h */
19 /* Otherwise EXCEPTION_REGISTRATION type declaration from winnt.h */
20 /* might be used. That declaration has "handler" callback with NTAPI */
21 /* attribute. The proper type (with "handler" field compatible with */
22 /* GC mark_ex_handler) is declared in excpt.h. The given macro is */
23 /* defined before any system header include. */
24 # define __MINGW_EXCPT_DEFINE_PSDK 1
25 #endif
26
27 #include "private/gc_pmark.h"
28
29 #include <stdio.h>
30
31 #if defined(MSWIN32) && defined(__GNUC__)
32 # include <excpt.h>
33 #endif
34
35 /* Make arguments appear live to compiler. Put here to minimize the */
36 /* risk of inlining. Used to minimize junk left in registers. */
37 GC_ATTR_NOINLINE
GC_noop6(word arg1 GC_ATTR_UNUSED,word arg2 GC_ATTR_UNUSED,word arg3 GC_ATTR_UNUSED,word arg4 GC_ATTR_UNUSED,word arg5 GC_ATTR_UNUSED,word arg6 GC_ATTR_UNUSED)38 void GC_noop6(word arg1 GC_ATTR_UNUSED, word arg2 GC_ATTR_UNUSED,
39 word arg3 GC_ATTR_UNUSED, word arg4 GC_ATTR_UNUSED,
40 word arg5 GC_ATTR_UNUSED, word arg6 GC_ATTR_UNUSED)
41 {
42 /* Avoid GC_noop6 calls to be optimized away. */
43 # if defined(AO_HAVE_compiler_barrier) && !defined(BASE_ATOMIC_OPS_EMULATED)
44 AO_compiler_barrier(); /* to serve as a special side-effect */
45 # else
46 GC_noop1(0);
47 # endif
48 }
49
50 volatile word GC_noop_sink;
51
52 /* Single argument version, robust against whole program analysis. */
53 GC_ATTR_NO_SANITIZE_THREAD
GC_noop1(word x)54 GC_API void GC_CALL GC_noop1(word x)
55 {
56 GC_noop_sink = x;
57 }
58
59 /* mark_proc GC_mark_procs[MAX_MARK_PROCS] = {0} -- declared in gc_priv.h */
60
61 GC_INNER unsigned GC_n_mark_procs = GC_RESERVED_MARK_PROCS;
62
63 /* Initialize GC_obj_kinds properly and standard free lists properly. */
64 /* This must be done statically since they may be accessed before */
65 /* GC_init is called. */
66 /* It's done here, since we need to deal with mark descriptors. */
67 GC_INNER struct obj_kind GC_obj_kinds[MAXOBJKINDS] = {
68 /* PTRFREE */ { &GC_aobjfreelist[0], 0 /* filled in dynamically */,
69 /* 0 | */ GC_DS_LENGTH, FALSE, FALSE
70 /*, */ OK_DISCLAIM_INITZ },
71 /* NORMAL */ { &GC_objfreelist[0], 0,
72 /* 0 | */ GC_DS_LENGTH,
73 /* adjusted in GC_init for EXTRA_BYTES */
74 TRUE /* add length to descr */, TRUE
75 /*, */ OK_DISCLAIM_INITZ },
76 /* UNCOLLECTABLE */
77 { &GC_uobjfreelist[0], 0,
78 /* 0 | */ GC_DS_LENGTH, TRUE /* add length to descr */, TRUE
79 /*, */ OK_DISCLAIM_INITZ },
80 # ifdef GC_ATOMIC_UNCOLLECTABLE
81 { &GC_auobjfreelist[0], 0,
82 /* 0 | */ GC_DS_LENGTH, FALSE /* add length to descr */, FALSE
83 /*, */ OK_DISCLAIM_INITZ },
84 # endif
85 };
86
87 GC_INNER unsigned GC_n_kinds = GC_N_KINDS_INITIAL_VALUE;
88
89 # ifndef INITIAL_MARK_STACK_SIZE
90 # define INITIAL_MARK_STACK_SIZE (1*HBLKSIZE)
91 /* INITIAL_MARK_STACK_SIZE * sizeof(mse) should be a */
92 /* multiple of HBLKSIZE. */
93 /* The incremental collector actually likes a larger */
94 /* size, since it wants to push all marked dirty */
95 /* objects before marking anything new. Currently we */
96 /* let it grow dynamically. */
97 # endif
98
99 #if !defined(GC_DISABLE_INCREMENTAL)
100 STATIC word GC_n_rescuing_pages = 0;
101 /* Number of dirty pages we marked from */
102 /* excludes ptrfree pages, etc. */
103 /* Used for logging only. */
104 #endif
105
106 GC_INNER size_t GC_mark_stack_size = 0;
107
108 #ifdef PARALLEL_MARK
109 STATIC volatile AO_t GC_first_nonempty = 0;
110 /* Lowest entry on mark stack */
111 /* that may be nonempty. */
112 /* Updated only by initiating */
113 /* thread. */
114 #endif
115
116 GC_INNER mark_state_t GC_mark_state = MS_NONE;
117
118 GC_INNER GC_bool GC_mark_stack_too_small = FALSE;
119
120 static struct hblk * scan_ptr;
121
122 STATIC GC_bool GC_objects_are_marked = FALSE;
123 /* Are there collectible marked objects in the heap? */
124
125 /* Is a collection in progress? Note that this can return true in the */
126 /* non-incremental case, if a collection has been abandoned and the */
127 /* mark state is now MS_INVALID. */
GC_collection_in_progress(void)128 GC_INNER GC_bool GC_collection_in_progress(void)
129 {
130 return(GC_mark_state != MS_NONE);
131 }
132
133 /* clear all mark bits in the header */
GC_clear_hdr_marks(hdr * hhdr)134 GC_INNER void GC_clear_hdr_marks(hdr *hhdr)
135 {
136 size_t last_bit;
137
138 # ifdef AO_HAVE_load
139 /* Atomic access is used to avoid racing with GC_realloc. */
140 last_bit = FINAL_MARK_BIT((size_t)AO_load((volatile AO_t *)&hhdr->hb_sz));
141 # else
142 /* No race as GC_realloc holds the lock while updating hb_sz. */
143 last_bit = FINAL_MARK_BIT((size_t)hhdr->hb_sz);
144 # endif
145
146 BZERO(hhdr -> hb_marks, sizeof(hhdr->hb_marks));
147 set_mark_bit_from_hdr(hhdr, last_bit);
148 hhdr -> hb_n_marks = 0;
149 }
150
151 /* Set all mark bits in the header. Used for uncollectible blocks. */
GC_set_hdr_marks(hdr * hhdr)152 GC_INNER void GC_set_hdr_marks(hdr *hhdr)
153 {
154 unsigned i;
155 size_t sz = (size_t)hhdr->hb_sz;
156 unsigned n_marks = (unsigned)FINAL_MARK_BIT(sz);
157
158 # ifdef USE_MARK_BYTES
159 for (i = 0; i <= n_marks; i += (unsigned)MARK_BIT_OFFSET(sz)) {
160 hhdr -> hb_marks[i] = 1;
161 }
162 # else
163 for (i = 0; i < divWORDSZ(n_marks + WORDSZ); ++i) {
164 hhdr -> hb_marks[i] = GC_WORD_MAX;
165 }
166 # endif
167 # ifdef MARK_BIT_PER_OBJ
168 hhdr -> hb_n_marks = n_marks;
169 # else
170 hhdr -> hb_n_marks = HBLK_OBJS(sz);
171 # endif
172 }
173
174 /*
175 * Clear all mark bits associated with block h.
176 */
clear_marks_for_block(struct hblk * h,word dummy GC_ATTR_UNUSED)177 static void clear_marks_for_block(struct hblk *h, word dummy GC_ATTR_UNUSED)
178 {
179 hdr * hhdr = HDR(h);
180
181 if (IS_UNCOLLECTABLE(hhdr -> hb_obj_kind)) return;
182 /* Mark bit for these is cleared only once the object is */
183 /* explicitly deallocated. This either frees the block, or */
184 /* the bit is cleared once the object is on the free list. */
185 GC_clear_hdr_marks(hhdr);
186 }
187
188 /* Slow but general routines for setting/clearing/asking about mark bits */
GC_set_mark_bit(const void * p)189 GC_API void GC_CALL GC_set_mark_bit(const void *p)
190 {
191 struct hblk *h = HBLKPTR(p);
192 hdr * hhdr = HDR(h);
193 word bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, hhdr -> hb_sz);
194
195 if (!mark_bit_from_hdr(hhdr, bit_no)) {
196 set_mark_bit_from_hdr(hhdr, bit_no);
197 ++hhdr -> hb_n_marks;
198 }
199 }
200
GC_clear_mark_bit(const void * p)201 GC_API void GC_CALL GC_clear_mark_bit(const void *p)
202 {
203 struct hblk *h = HBLKPTR(p);
204 hdr * hhdr = HDR(h);
205 word bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, hhdr -> hb_sz);
206
207 if (mark_bit_from_hdr(hhdr, bit_no)) {
208 size_t n_marks = hhdr -> hb_n_marks;
209
210 GC_ASSERT(n_marks != 0);
211 clear_mark_bit_from_hdr(hhdr, bit_no);
212 n_marks--;
213 # ifdef PARALLEL_MARK
214 if (n_marks != 0 || !GC_parallel)
215 hhdr -> hb_n_marks = n_marks;
216 /* Don't decrement to zero. The counts are approximate due to */
217 /* concurrency issues, but we need to ensure that a count of */
218 /* zero implies an empty block. */
219 # else
220 hhdr -> hb_n_marks = n_marks;
221 # endif
222 }
223 }
224
GC_is_marked(const void * p)225 GC_API int GC_CALL GC_is_marked(const void *p)
226 {
227 struct hblk *h = HBLKPTR(p);
228 hdr * hhdr = HDR(h);
229 word bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, hhdr -> hb_sz);
230
231 return (int)mark_bit_from_hdr(hhdr, bit_no); /* 0 or 1 */
232 }
233
234 /*
235 * Clear mark bits in all allocated heap blocks. This invalidates
236 * the marker invariant, and sets GC_mark_state to reflect this.
237 * (This implicitly starts marking to reestablish the invariant.)
238 */
GC_clear_marks(void)239 GC_INNER void GC_clear_marks(void)
240 {
241 GC_apply_to_all_blocks(clear_marks_for_block, (word)0);
242 GC_objects_are_marked = FALSE;
243 GC_mark_state = MS_INVALID;
244 scan_ptr = 0;
245 }
246
247 /* Initiate a garbage collection. Initiates a full collection if the */
248 /* mark state is invalid. */
GC_initiate_gc(void)249 GC_INNER void GC_initiate_gc(void)
250 {
251 GC_ASSERT(I_HOLD_LOCK());
252 # ifndef GC_DISABLE_INCREMENTAL
253 if (GC_incremental) {
254 # ifdef CHECKSUMS
255 GC_read_dirty(FALSE);
256 # else
257 GC_read_dirty(GC_mark_state == MS_INVALID);
258 # endif
259 }
260 # endif
261 # ifdef CHECKSUMS
262 if (GC_incremental) GC_check_dirty();
263 # endif
264 # if !defined(GC_DISABLE_INCREMENTAL)
265 GC_n_rescuing_pages = 0;
266 # endif
267 if (GC_mark_state == MS_NONE) {
268 GC_mark_state = MS_PUSH_RESCUERS;
269 } else if (GC_mark_state != MS_INVALID) {
270 ABORT("Unexpected state");
271 } /* else this is really a full collection, and mark */
272 /* bits are invalid. */
273 scan_ptr = 0;
274 }
275
276 #ifdef PARALLEL_MARK
277 STATIC void GC_do_parallel_mark(void); /* initiate parallel marking. */
278 #endif /* PARALLEL_MARK */
279
280 #ifdef GC_DISABLE_INCREMENTAL
281 # define GC_push_next_marked_dirty(h) GC_push_next_marked(h)
282 #else
283 STATIC struct hblk * GC_push_next_marked_dirty(struct hblk *h);
284 /* Invoke GC_push_marked on next dirty block above h. */
285 /* Return a pointer just past the end of this block. */
286 #endif /* !GC_DISABLE_INCREMENTAL */
287 STATIC struct hblk * GC_push_next_marked(struct hblk *h);
288 /* Ditto, but also mark from clean pages. */
289 STATIC struct hblk * GC_push_next_marked_uncollectable(struct hblk *h);
290 /* Ditto, but mark only from uncollectible pages. */
291
292 static void alloc_mark_stack(size_t);
293
294 /* Perform a small amount of marking. */
295 /* We try to touch roughly a page of memory. */
296 /* Return TRUE if we just finished a mark phase. */
297 /* Cold_gc_frame is an address inside a GC frame that */
298 /* remains valid until all marking is complete. */
299 /* A zero value indicates that it's OK to miss some */
300 /* register values. */
301 /* We hold the allocation lock. In the case of */
302 /* incremental collection, the world may not be stopped.*/
303 #ifdef WRAP_MARK_SOME
304 /* For win32, this is called after we establish a structured */
305 /* exception handler, in case Windows unmaps one of our root */
306 /* segments. See below. In either case, we acquire the */
307 /* allocator lock long before we get here. */
GC_mark_some_inner(ptr_t cold_gc_frame)308 STATIC GC_bool GC_mark_some_inner(ptr_t cold_gc_frame)
309 #else
310 GC_INNER GC_bool GC_mark_some(ptr_t cold_gc_frame)
311 #endif
312 {
313 switch(GC_mark_state) {
314 case MS_NONE:
315 break;
316
317 case MS_PUSH_RESCUERS:
318 if ((word)GC_mark_stack_top
319 >= (word)(GC_mark_stack_limit - INITIAL_MARK_STACK_SIZE/2)) {
320 /* Go ahead and mark, even though that might cause us to */
321 /* see more marked dirty objects later on. Avoid this */
322 /* in the future. */
323 GC_mark_stack_too_small = TRUE;
324 MARK_FROM_MARK_STACK();
325 break;
326 } else {
327 scan_ptr = GC_push_next_marked_dirty(scan_ptr);
328 if (scan_ptr == 0) {
329 # if !defined(GC_DISABLE_INCREMENTAL)
330 GC_COND_LOG_PRINTF("Marked from %lu dirty pages\n",
331 (unsigned long)GC_n_rescuing_pages);
332 # endif
333 GC_push_roots(FALSE, cold_gc_frame);
334 GC_objects_are_marked = TRUE;
335 if (GC_mark_state != MS_INVALID) {
336 GC_mark_state = MS_ROOTS_PUSHED;
337 }
338 }
339 }
340 break;
341
342 case MS_PUSH_UNCOLLECTABLE:
343 if ((word)GC_mark_stack_top
344 >= (word)(GC_mark_stack + GC_mark_stack_size/4)) {
345 # ifdef PARALLEL_MARK
346 /* Avoid this, since we don't parallelize the marker */
347 /* here. */
348 if (GC_parallel) GC_mark_stack_too_small = TRUE;
349 # endif
350 MARK_FROM_MARK_STACK();
351 break;
352 } else {
353 scan_ptr = GC_push_next_marked_uncollectable(scan_ptr);
354 if (scan_ptr == 0) {
355 GC_push_roots(TRUE, cold_gc_frame);
356 GC_objects_are_marked = TRUE;
357 if (GC_mark_state != MS_INVALID) {
358 GC_mark_state = MS_ROOTS_PUSHED;
359 }
360 }
361 }
362 break;
363
364 case MS_ROOTS_PUSHED:
365 # ifdef PARALLEL_MARK
366 /* In the incremental GC case, this currently doesn't */
367 /* quite do the right thing, since it runs to */
368 /* completion. On the other hand, starting a */
369 /* parallel marker is expensive, so perhaps it is */
370 /* the right thing? */
371 /* Eventually, incremental marking should run */
372 /* asynchronously in multiple threads, without grabbing */
373 /* the allocation lock. */
374 if (GC_parallel) {
375 GC_do_parallel_mark();
376 GC_ASSERT((word)GC_mark_stack_top < (word)GC_first_nonempty);
377 GC_mark_stack_top = GC_mark_stack - 1;
378 if (GC_mark_stack_too_small) {
379 alloc_mark_stack(2*GC_mark_stack_size);
380 }
381 if (GC_mark_state == MS_ROOTS_PUSHED) {
382 GC_mark_state = MS_NONE;
383 return(TRUE);
384 }
385 break;
386 }
387 # endif
388 if ((word)GC_mark_stack_top >= (word)GC_mark_stack) {
389 MARK_FROM_MARK_STACK();
390 break;
391 } else {
392 GC_mark_state = MS_NONE;
393 if (GC_mark_stack_too_small) {
394 alloc_mark_stack(2*GC_mark_stack_size);
395 }
396 return(TRUE);
397 }
398
399 case MS_INVALID:
400 case MS_PARTIALLY_INVALID:
401 if (!GC_objects_are_marked) {
402 GC_mark_state = MS_PUSH_UNCOLLECTABLE;
403 break;
404 }
405 if ((word)GC_mark_stack_top >= (word)GC_mark_stack) {
406 MARK_FROM_MARK_STACK();
407 break;
408 }
409 if (scan_ptr == 0 && GC_mark_state == MS_INVALID) {
410 /* About to start a heap scan for marked objects. */
411 /* Mark stack is empty. OK to reallocate. */
412 if (GC_mark_stack_too_small) {
413 alloc_mark_stack(2*GC_mark_stack_size);
414 }
415 GC_mark_state = MS_PARTIALLY_INVALID;
416 }
417 scan_ptr = GC_push_next_marked(scan_ptr);
418 if (scan_ptr == 0 && GC_mark_state == MS_PARTIALLY_INVALID) {
419 GC_push_roots(TRUE, cold_gc_frame);
420 GC_objects_are_marked = TRUE;
421 if (GC_mark_state != MS_INVALID) {
422 GC_mark_state = MS_ROOTS_PUSHED;
423 }
424 }
425 break;
426
427 default:
428 ABORT("GC_mark_some: bad state");
429 }
430 return(FALSE);
431 }
432
433 #ifdef WRAP_MARK_SOME
434
435 # if (defined(MSWIN32) || defined(MSWINCE)) && defined(__GNUC__)
436
437 typedef struct {
438 EXCEPTION_REGISTRATION ex_reg;
439 void *alt_path;
440 } ext_ex_regn;
441
mark_ex_handler(struct _EXCEPTION_RECORD * ex_rec,void * est_frame,struct _CONTEXT * context,void * disp_ctxt GC_ATTR_UNUSED)442 static EXCEPTION_DISPOSITION mark_ex_handler(
443 struct _EXCEPTION_RECORD *ex_rec,
444 void *est_frame,
445 struct _CONTEXT *context,
446 void *disp_ctxt GC_ATTR_UNUSED)
447 {
448 if (ex_rec->ExceptionCode == STATUS_ACCESS_VIOLATION) {
449 ext_ex_regn *xer = (ext_ex_regn *)est_frame;
450
451 /* Unwind from the inner function assuming the standard */
452 /* function prologue. */
453 /* Assumes code has not been compiled with */
454 /* -fomit-frame-pointer. */
455 context->Esp = context->Ebp;
456 context->Ebp = *((DWORD *)context->Esp);
457 context->Esp = context->Esp - 8;
458
459 /* Resume execution at the "real" handler within the */
460 /* wrapper function. */
461 context->Eip = (DWORD )(xer->alt_path);
462
463 return ExceptionContinueExecution;
464
465 } else {
466 return ExceptionContinueSearch;
467 }
468 }
469 # endif /* __GNUC__ && MSWIN32 */
470
GC_mark_some(ptr_t cold_gc_frame)471 GC_INNER GC_bool GC_mark_some(ptr_t cold_gc_frame)
472 {
473 GC_bool ret_val;
474
475 # if defined(MSWIN32) || defined(MSWINCE)
476 # ifndef __GNUC__
477 /* Windows 98 appears to asynchronously create and remove */
478 /* writable memory mappings, for reasons we haven't yet */
479 /* understood. Since we look for writable regions to */
480 /* determine the root set, we may try to mark from an */
481 /* address range that disappeared since we started the */
482 /* collection. Thus we have to recover from faults here. */
483 /* This code does not appear to be necessary for Windows */
484 /* 95/NT/2000+. Note that this code should never generate */
485 /* an incremental GC write fault. */
486 /* This code seems to be necessary for WinCE (at least in */
487 /* the case we'd decide to add MEM_PRIVATE sections to */
488 /* data roots in GC_register_dynamic_libraries()). */
489 /* It's conceivable that this is the same issue with */
490 /* terminating threads that we see with Linux and */
491 /* USE_PROC_FOR_LIBRARIES. */
492
493 __try {
494 ret_val = GC_mark_some_inner(cold_gc_frame);
495 } __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
496 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
497 goto handle_ex;
498 }
499 # if defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS)
500 /* With DllMain-based thread tracking, a thread may have */
501 /* started while we were marking. This is logically equivalent */
502 /* to the exception case; our results are invalid and we have */
503 /* to start over. This cannot be prevented since we can't */
504 /* block in DllMain. */
505 if (GC_started_thread_while_stopped())
506 goto handle_thr_start;
507 # endif
508 rm_handler:
509 return ret_val;
510
511 # else /* __GNUC__ */
512
513 /* Manually install an exception handler since GCC does */
514 /* not yet support Structured Exception Handling (SEH) on */
515 /* Win32. */
516
517 ext_ex_regn er;
518
519 # if GC_GNUC_PREREQ(4, 7) || GC_CLANG_PREREQ(3, 3)
520 # pragma GCC diagnostic push
521 /* Suppress "taking the address of label is non-standard" warning. */
522 # if defined(__clang__) || GC_GNUC_PREREQ(6, 4)
523 # pragma GCC diagnostic ignored "-Wpedantic"
524 # else
525 /* GCC before ~4.8 does not accept "-Wpedantic" quietly. */
526 # pragma GCC diagnostic ignored "-pedantic"
527 # endif
528 er.alt_path = &&handle_ex;
529 # pragma GCC diagnostic pop
530 # elif !defined(CPPCHECK) /* pragma diagnostic is not supported */
531 er.alt_path = &&handle_ex;
532 # endif
533 er.ex_reg.handler = mark_ex_handler;
534 __asm__ __volatile__ ("movl %%fs:0, %0" : "=r" (er.ex_reg.prev));
535 __asm__ __volatile__ ("movl %0, %%fs:0" : : "r" (&er));
536 ret_val = GC_mark_some_inner(cold_gc_frame);
537 /* Prevent GCC from considering the following code unreachable */
538 /* and thus eliminating it. */
539 if (er.alt_path == 0)
540 goto handle_ex;
541 # if defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS)
542 if (GC_started_thread_while_stopped())
543 goto handle_thr_start;
544 # endif
545 rm_handler:
546 /* Uninstall the exception handler */
547 __asm__ __volatile__ ("mov %0, %%fs:0" : : "r" (er.ex_reg.prev));
548 return ret_val;
549
550 # endif /* __GNUC__ */
551 # else /* !MSWIN32 */
552 /* Here we are handling the case in which /proc is used for root */
553 /* finding, and we have threads. We may find a stack for a */
554 /* thread that is in the process of exiting, and disappears */
555 /* while we are marking it. This seems extremely difficult to */
556 /* avoid otherwise. */
557 # ifndef DEFAULT_VDB
558 if (GC_auto_incremental) {
559 static GC_bool is_warned = FALSE;
560
561 if (!is_warned) {
562 is_warned = TRUE;
563 WARN("Incremental GC incompatible with /proc roots\n", 0);
564 }
565 /* I'm not sure if this could still work ... */
566 }
567 # endif
568 GC_setup_temporary_fault_handler();
569 if(SETJMP(GC_jmp_buf) != 0) goto handle_ex;
570 ret_val = GC_mark_some_inner(cold_gc_frame);
571 rm_handler:
572 GC_reset_fault_handler();
573 return ret_val;
574
575 # endif /* !MSWIN32 */
576
577 handle_ex:
578 /* Exception handler starts here for all cases. */
579 {
580 static word warned_gc_no;
581
582 /* Warn about it at most once per collection. */
583 if (warned_gc_no != GC_gc_no) {
584 warned_gc_no = GC_gc_no;
585 WARN("Caught ACCESS_VIOLATION in marker;"
586 " memory mapping disappeared\n", 0);
587 }
588 }
589 # if (defined(MSWIN32) || defined(MSWINCE)) && defined(GC_WIN32_THREADS) \
590 && !defined(GC_PTHREADS)
591 handle_thr_start:
592 # endif
593 /* We have bad roots on the stack. Discard mark stack. */
594 /* Rescan from marked objects. Redetermine roots. */
595 # ifdef REGISTER_LIBRARIES_EARLY
596 START_WORLD();
597 GC_cond_register_dynamic_libraries();
598 STOP_WORLD();
599 # endif
600 GC_invalidate_mark_state();
601 scan_ptr = 0;
602
603 ret_val = FALSE;
604 goto rm_handler; /* Back to platform-specific code. */
605 }
606 #endif /* WRAP_MARK_SOME */
607
GC_invalidate_mark_state(void)608 GC_INNER void GC_invalidate_mark_state(void)
609 {
610 GC_mark_state = MS_INVALID;
611 GC_mark_stack_top = GC_mark_stack-1;
612 }
613
GC_signal_mark_stack_overflow(mse * msp)614 GC_INNER mse * GC_signal_mark_stack_overflow(mse *msp)
615 {
616 GC_mark_state = MS_INVALID;
617 # ifdef PARALLEL_MARK
618 /* We are using a local_mark_stack in parallel mode, so */
619 /* do not signal the global mark stack to be resized. */
620 /* That will be done if required in GC_return_mark_stack. */
621 if (!GC_parallel)
622 GC_mark_stack_too_small = TRUE;
623 # else
624 GC_mark_stack_too_small = TRUE;
625 # endif
626 GC_COND_LOG_PRINTF("Mark stack overflow; current size = %lu entries\n",
627 (unsigned long)GC_mark_stack_size);
628 return(msp - GC_MARK_STACK_DISCARDS);
629 }
630
631 /*
632 * Mark objects pointed to by the regions described by
633 * mark stack entries between mark_stack and mark_stack_top,
634 * inclusive. Assumes the upper limit of a mark stack entry
635 * is never 0. A mark stack entry never has size 0.
636 * We try to traverse on the order of a hblk of memory before we return.
637 * Caller is responsible for calling this until the mark stack is empty.
638 * Note that this is the most performance critical routine in the
639 * collector. Hence it contains all sorts of ugly hacks to speed
640 * things up. In particular, we avoid procedure calls on the common
641 * path, we take advantage of peculiarities of the mark descriptor
642 * encoding, we optionally maintain a cache for the block address to
643 * header mapping, we prefetch when an object is "grayed", etc.
644 */
645 GC_ATTR_NO_SANITIZE_ADDR GC_ATTR_NO_SANITIZE_MEMORY GC_ATTR_NO_SANITIZE_THREAD
GC_mark_from(mse * mark_stack_top,mse * mark_stack,mse * mark_stack_limit)646 GC_INNER mse * GC_mark_from(mse *mark_stack_top, mse *mark_stack,
647 mse *mark_stack_limit)
648 {
649 signed_word credit = HBLKSIZE; /* Remaining credit for marking work */
650 ptr_t current_p; /* Pointer to current candidate ptr. */
651 word current; /* Candidate pointer. */
652 ptr_t limit = 0; /* (Incl) limit of current candidate range. */
653 word descr;
654 ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
655 ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
656 DECLARE_HDR_CACHE;
657
658 # define SPLIT_RANGE_WORDS 128 /* Must be power of 2. */
659
660 GC_objects_are_marked = TRUE;
661 INIT_HDR_CACHE;
662 # ifdef OS2 /* Use untweaked version to circumvent compiler problem */
663 while ((word)mark_stack_top >= (word)mark_stack && credit >= 0)
664 # else
665 while (((((word)mark_stack_top - (word)mark_stack) | (word)credit)
666 & SIGNB) == 0)
667 # endif
668 {
669 current_p = mark_stack_top -> mse_start;
670 descr = mark_stack_top -> mse_descr.w;
671 retry:
672 /* current_p and descr describe the current object. */
673 /* *mark_stack_top is vacant. */
674 /* The following is 0 only for small objects described by a simple */
675 /* length descriptor. For many applications this is the common */
676 /* case, so we try to detect it quickly. */
677 if (descr & ((~(WORDS_TO_BYTES(SPLIT_RANGE_WORDS) - 1)) | GC_DS_TAGS)) {
678 word tag = descr & GC_DS_TAGS;
679
680 GC_STATIC_ASSERT(GC_DS_TAGS == 0x3);
681 switch(tag) {
682 case GC_DS_LENGTH:
683 /* Large length. */
684 /* Process part of the range to avoid pushing too much on the */
685 /* stack. */
686 GC_ASSERT(descr < (word)GC_greatest_plausible_heap_addr
687 - (word)GC_least_plausible_heap_addr
688 || (word)(current_p + descr)
689 <= (word)GC_least_plausible_heap_addr
690 || (word)current_p >= (word)GC_greatest_plausible_heap_addr);
691 # ifdef PARALLEL_MARK
692 # define SHARE_BYTES 2048
693 if (descr > SHARE_BYTES && GC_parallel
694 && (word)mark_stack_top < (word)(mark_stack_limit - 1)) {
695 word new_size = (descr/2) & ~(word)(sizeof(word)-1);
696
697 mark_stack_top -> mse_start = current_p;
698 mark_stack_top -> mse_descr.w = new_size + sizeof(word);
699 /* makes sure we handle */
700 /* misaligned pointers. */
701 mark_stack_top++;
702 # ifdef ENABLE_TRACE
703 if ((word)GC_trace_addr >= (word)current_p
704 && (word)GC_trace_addr < (word)(current_p + descr)) {
705 GC_log_printf("GC #%u: large section; start %p, len %lu,"
706 " splitting (parallel) at %p\n",
707 (unsigned)GC_gc_no, (void *)current_p,
708 (unsigned long)descr,
709 (void *)(current_p + new_size));
710 }
711 # endif
712 current_p += new_size;
713 descr -= new_size;
714 goto retry;
715 }
716 # endif /* PARALLEL_MARK */
717 mark_stack_top -> mse_start =
718 limit = current_p + WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1);
719 mark_stack_top -> mse_descr.w =
720 descr - WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1);
721 # ifdef ENABLE_TRACE
722 if ((word)GC_trace_addr >= (word)current_p
723 && (word)GC_trace_addr < (word)(current_p + descr)) {
724 GC_log_printf("GC #%u: large section; start %p, len %lu,"
725 " splitting at %p\n",
726 (unsigned)GC_gc_no, (void *)current_p,
727 (unsigned long)descr, (void *)limit);
728 }
729 # endif
730 /* Make sure that pointers overlapping the two ranges are */
731 /* considered. */
732 limit += sizeof(word) - ALIGNMENT;
733 break;
734 case GC_DS_BITMAP:
735 mark_stack_top--;
736 # ifdef ENABLE_TRACE
737 if ((word)GC_trace_addr >= (word)current_p
738 && (word)GC_trace_addr < (word)(current_p
739 + WORDS_TO_BYTES(WORDSZ-2))) {
740 GC_log_printf("GC #%u: tracing from %p bitmap descr %lu\n",
741 (unsigned)GC_gc_no, (void *)current_p,
742 (unsigned long)descr);
743 }
744 # endif /* ENABLE_TRACE */
745 descr &= ~GC_DS_TAGS;
746 credit -= WORDS_TO_BYTES(WORDSZ/2); /* guess */
747 while (descr != 0) {
748 if ((descr & SIGNB) != 0) {
749 current = *(word *)current_p;
750 FIXUP_POINTER(current);
751 if (current >= (word)least_ha && current < (word)greatest_ha) {
752 PREFETCH((ptr_t)current);
753 # ifdef ENABLE_TRACE
754 if (GC_trace_addr == current_p) {
755 GC_log_printf("GC #%u: considering(3) %p -> %p\n",
756 (unsigned)GC_gc_no, (void *)current_p,
757 (void *)current);
758 }
759 # endif /* ENABLE_TRACE */
760 PUSH_CONTENTS((ptr_t)current, mark_stack_top,
761 mark_stack_limit, current_p);
762 }
763 }
764 descr <<= 1;
765 current_p += sizeof(word);
766 }
767 continue;
768 case GC_DS_PROC:
769 mark_stack_top--;
770 # ifdef ENABLE_TRACE
771 if ((word)GC_trace_addr >= (word)current_p
772 && GC_base(current_p) != 0
773 && GC_base(current_p) == GC_base(GC_trace_addr)) {
774 GC_log_printf("GC #%u: tracing from %p, proc descr %lu\n",
775 (unsigned)GC_gc_no, (void *)current_p,
776 (unsigned long)descr);
777 }
778 # endif /* ENABLE_TRACE */
779 credit -= GC_PROC_BYTES;
780 mark_stack_top = (*PROC(descr))((word *)current_p, mark_stack_top,
781 mark_stack_limit, ENV(descr));
782 continue;
783 case GC_DS_PER_OBJECT:
784 if ((signed_word)descr >= 0) {
785 /* Descriptor is in the object. */
786 descr = *(word *)(current_p + descr - GC_DS_PER_OBJECT);
787 } else {
788 /* Descriptor is in type descriptor pointed to by first */
789 /* word in object. */
790 ptr_t type_descr = *(ptr_t *)current_p;
791 /* type_descr is either a valid pointer to the descriptor */
792 /* structure, or this object was on a free list. */
793 /* If it was anything but the last object on the free list, */
794 /* we will misinterpret the next object on the free list as */
795 /* the type descriptor, and get a 0 GC descriptor, which */
796 /* is ideal. Unfortunately, we need to check for the last */
797 /* object case explicitly. */
798 if (EXPECT(0 == type_descr, FALSE)) {
799 mark_stack_top--;
800 continue;
801 }
802 descr = *(word *)(type_descr
803 - ((signed_word)descr + (GC_INDIR_PER_OBJ_BIAS
804 - GC_DS_PER_OBJECT)));
805 }
806 if (0 == descr) {
807 /* Can happen either because we generated a 0 descriptor */
808 /* or we saw a pointer to a free object. */
809 mark_stack_top--;
810 continue;
811 }
812 goto retry;
813 }
814 } else /* Small object with length descriptor */ {
815 mark_stack_top--;
816 # ifndef SMALL_CONFIG
817 if (descr < sizeof(word))
818 continue;
819 # endif
820 # ifdef ENABLE_TRACE
821 if ((word)GC_trace_addr >= (word)current_p
822 && (word)GC_trace_addr < (word)(current_p + descr)) {
823 GC_log_printf("GC #%u: small object; start %p, len %lu\n",
824 (unsigned)GC_gc_no, (void *)current_p,
825 (unsigned long)descr);
826 }
827 # endif
828 limit = current_p + (word)descr;
829 }
830 /* The simple case in which we're scanning a range. */
831 GC_ASSERT(!((word)current_p & (ALIGNMENT-1)));
832 credit -= limit - current_p;
833 limit -= sizeof(word);
834 {
835 # define PREF_DIST 4
836
837 # ifndef SMALL_CONFIG
838 word deferred;
839
840 /* Try to prefetch the next pointer to be examined ASAP. */
841 /* Empirically, this also seems to help slightly without */
842 /* prefetches, at least on linux/X86. Presumably this loop */
843 /* ends up with less register pressure, and gcc thus ends up */
844 /* generating slightly better code. Overall gcc code quality */
845 /* for this loop is still not great. */
846 for(;;) {
847 PREFETCH(limit - PREF_DIST*CACHE_LINE_SIZE);
848 GC_ASSERT((word)limit >= (word)current_p);
849 deferred = *(word *)limit;
850 FIXUP_POINTER(deferred);
851 limit -= ALIGNMENT;
852 if (deferred >= (word)least_ha && deferred < (word)greatest_ha) {
853 PREFETCH((ptr_t)deferred);
854 break;
855 }
856 if ((word)current_p > (word)limit) goto next_object;
857 /* Unroll once, so we don't do too many of the prefetches */
858 /* based on limit. */
859 deferred = *(word *)limit;
860 FIXUP_POINTER(deferred);
861 limit -= ALIGNMENT;
862 if (deferred >= (word)least_ha && deferred < (word)greatest_ha) {
863 PREFETCH((ptr_t)deferred);
864 break;
865 }
866 if ((word)current_p > (word)limit) goto next_object;
867 }
868 # endif
869
870 while ((word)current_p <= (word)limit) {
871 /* Empirically, unrolling this loop doesn't help a lot. */
872 /* Since PUSH_CONTENTS expands to a lot of code, */
873 /* we don't. */
874 current = *(word *)current_p;
875 FIXUP_POINTER(current);
876 PREFETCH(current_p + PREF_DIST*CACHE_LINE_SIZE);
877 if (current >= (word)least_ha && current < (word)greatest_ha) {
878 /* Prefetch the contents of the object we just pushed. It's */
879 /* likely we will need them soon. */
880 PREFETCH((ptr_t)current);
881 # ifdef ENABLE_TRACE
882 if (GC_trace_addr == current_p) {
883 GC_log_printf("GC #%u: considering(1) %p -> %p\n",
884 (unsigned)GC_gc_no, (void *)current_p,
885 (void *)current);
886 }
887 # endif /* ENABLE_TRACE */
888 PUSH_CONTENTS((ptr_t)current, mark_stack_top,
889 mark_stack_limit, current_p);
890 }
891 current_p += ALIGNMENT;
892 }
893
894 # ifndef SMALL_CONFIG
895 /* We still need to mark the entry we previously prefetched. */
896 /* We already know that it passes the preliminary pointer */
897 /* validity test. */
898 # ifdef ENABLE_TRACE
899 if (GC_trace_addr == current_p) {
900 GC_log_printf("GC #%u: considering(2) %p -> %p\n",
901 (unsigned)GC_gc_no, (void *)current_p,
902 (void *)deferred);
903 }
904 # endif /* ENABLE_TRACE */
905 PUSH_CONTENTS((ptr_t)deferred, mark_stack_top,
906 mark_stack_limit, current_p);
907 next_object:;
908 # endif
909 }
910 }
911 return mark_stack_top;
912 }
913
914 #ifdef PARALLEL_MARK
915
916 STATIC GC_bool GC_help_wanted = FALSE; /* Protected by mark lock */
917 STATIC unsigned GC_helper_count = 0; /* Number of running helpers. */
918 /* Protected by mark lock */
919 STATIC unsigned GC_active_count = 0; /* Number of active helpers. */
920 /* Protected by mark lock */
921 /* May increase and decrease */
922 /* within each mark cycle. But */
923 /* once it returns to 0, it */
924 /* stays zero for the cycle. */
925
926 GC_INNER word GC_mark_no = 0;
927
928 static mse *main_local_mark_stack;
929
930 #ifdef LINT2
931 # define LOCAL_MARK_STACK_SIZE (HBLKSIZE / 8)
932 #else
933 # define LOCAL_MARK_STACK_SIZE HBLKSIZE
934 /* Under normal circumstances, this is big enough to guarantee */
935 /* we don't overflow half of it in a single call to */
936 /* GC_mark_from. */
937 #endif
938
939 /* Wait all markers to finish initialization (i.e. store */
940 /* marker_[b]sp, marker_mach_threads, GC_marker_Id). */
GC_wait_for_markers_init(void)941 GC_INNER void GC_wait_for_markers_init(void)
942 {
943 signed_word count;
944
945 if (GC_markers_m1 == 0)
946 return;
947
948 /* Allocate the local mark stack for the thread that holds GC lock. */
949 # ifndef CAN_HANDLE_FORK
950 GC_ASSERT(NULL == main_local_mark_stack);
951 # else
952 if (NULL == main_local_mark_stack)
953 # endif
954 {
955 size_t bytes_to_get =
956 ROUNDUP_PAGESIZE_IF_MMAP(LOCAL_MARK_STACK_SIZE * sizeof(mse));
957 main_local_mark_stack = (mse *)GET_MEM(bytes_to_get);
958 if (NULL == main_local_mark_stack)
959 ABORT("Insufficient memory for main local_mark_stack");
960 GC_add_to_our_memory((ptr_t)main_local_mark_stack, bytes_to_get);
961 }
962
963 /* Reuse marker lock and builders count to synchronize */
964 /* marker threads startup. */
965 GC_acquire_mark_lock();
966 GC_fl_builder_count += GC_markers_m1;
967 count = GC_fl_builder_count;
968 GC_release_mark_lock();
969 if (count != 0) {
970 GC_ASSERT(count > 0);
971 GC_wait_for_reclaim();
972 }
973 }
974
975 /* Steal mark stack entries starting at mse low into mark stack local */
976 /* until we either steal mse high, or we have max entries. */
977 /* Return a pointer to the top of the local mark stack. */
978 /* *next is replaced by a pointer to the next unscanned mark stack */
979 /* entry. */
GC_steal_mark_stack(mse * low,mse * high,mse * local,unsigned max,mse ** next)980 STATIC mse * GC_steal_mark_stack(mse * low, mse * high, mse * local,
981 unsigned max, mse **next)
982 {
983 mse *p;
984 mse *top = local - 1;
985 unsigned i = 0;
986
987 GC_ASSERT((word)high >= (word)(low - 1)
988 && (word)(high - low + 1) <= GC_mark_stack_size);
989 for (p = low; (word)p <= (word)high && i <= max; ++p) {
990 word descr = (word)AO_load(&p->mse_descr.ao);
991 if (descr != 0) {
992 /* Must be ordered after read of descr: */
993 AO_store_release_write(&p->mse_descr.ao, 0);
994 /* More than one thread may get this entry, but that's only */
995 /* a minor performance problem. */
996 ++top;
997 top -> mse_descr.w = descr;
998 top -> mse_start = p -> mse_start;
999 GC_ASSERT((descr & GC_DS_TAGS) != GC_DS_LENGTH
1000 || descr < (word)GC_greatest_plausible_heap_addr
1001 - (word)GC_least_plausible_heap_addr
1002 || (word)(p->mse_start + descr)
1003 <= (word)GC_least_plausible_heap_addr
1004 || (word)p->mse_start
1005 >= (word)GC_greatest_plausible_heap_addr);
1006 /* If this is a big object, count it as */
1007 /* size/256 + 1 objects. */
1008 ++i;
1009 if ((descr & GC_DS_TAGS) == GC_DS_LENGTH) i += (int)(descr >> 8);
1010 }
1011 }
1012 *next = p;
1013 return top;
1014 }
1015
1016 /* Copy back a local mark stack. */
1017 /* low and high are inclusive bounds. */
GC_return_mark_stack(mse * low,mse * high)1018 STATIC void GC_return_mark_stack(mse * low, mse * high)
1019 {
1020 mse * my_top;
1021 mse * my_start;
1022 size_t stack_size;
1023
1024 if ((word)high < (word)low) return;
1025 stack_size = high - low + 1;
1026 GC_acquire_mark_lock();
1027 my_top = GC_mark_stack_top; /* Concurrent modification impossible. */
1028 my_start = my_top + 1;
1029 if ((word)(my_start - GC_mark_stack + stack_size)
1030 > (word)GC_mark_stack_size) {
1031 GC_COND_LOG_PRINTF("No room to copy back mark stack\n");
1032 GC_mark_state = MS_INVALID;
1033 GC_mark_stack_too_small = TRUE;
1034 /* We drop the local mark stack. We'll fix things later. */
1035 } else {
1036 BCOPY(low, my_start, stack_size * sizeof(mse));
1037 GC_ASSERT((mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top))
1038 == my_top);
1039 AO_store_release_write((volatile AO_t *)(&GC_mark_stack_top),
1040 (AO_t)(my_top + stack_size));
1041 /* Ensures visibility of previously written stack contents. */
1042 }
1043 GC_release_mark_lock();
1044 GC_notify_all_marker();
1045 }
1046
1047 #ifndef N_LOCAL_ITERS
1048 # define N_LOCAL_ITERS 1
1049 #endif
1050
1051 /* This function is only called when the local */
1052 /* and the main mark stacks are both empty. */
has_inactive_helpers(void)1053 static GC_bool has_inactive_helpers(void)
1054 {
1055 GC_bool res;
1056
1057 GC_acquire_mark_lock();
1058 res = GC_active_count < GC_helper_count;
1059 GC_release_mark_lock();
1060 return res;
1061 }
1062
1063 /* Mark from the local mark stack. */
1064 /* On return, the local mark stack is empty. */
1065 /* But this may be achieved by copying the */
1066 /* local mark stack back into the global one. */
1067 /* We do not hold the mark lock. */
GC_do_local_mark(mse * local_mark_stack,mse * local_top)1068 STATIC void GC_do_local_mark(mse *local_mark_stack, mse *local_top)
1069 {
1070 unsigned n;
1071
1072 for (;;) {
1073 for (n = 0; n < N_LOCAL_ITERS; ++n) {
1074 local_top = GC_mark_from(local_top, local_mark_stack,
1075 local_mark_stack + LOCAL_MARK_STACK_SIZE);
1076 if ((word)local_top < (word)local_mark_stack) return;
1077 if ((word)(local_top - local_mark_stack)
1078 >= LOCAL_MARK_STACK_SIZE / 2) {
1079 GC_return_mark_stack(local_mark_stack, local_top);
1080 return;
1081 }
1082 }
1083 if ((word)AO_load((volatile AO_t *)&GC_mark_stack_top)
1084 < (word)AO_load(&GC_first_nonempty)
1085 && (word)local_top > (word)(local_mark_stack + 1)
1086 && has_inactive_helpers()) {
1087 /* Try to share the load, since the main stack is empty, */
1088 /* and helper threads are waiting for a refill. */
1089 /* The entries near the bottom of the stack are likely */
1090 /* to require more work. Thus we return those, even though */
1091 /* it's harder. */
1092 mse * new_bottom = local_mark_stack
1093 + (local_top - local_mark_stack)/2;
1094 GC_ASSERT((word)new_bottom > (word)local_mark_stack
1095 && (word)new_bottom < (word)local_top);
1096 GC_return_mark_stack(local_mark_stack, new_bottom - 1);
1097 memmove(local_mark_stack, new_bottom,
1098 (local_top - new_bottom + 1) * sizeof(mse));
1099 local_top -= (new_bottom - local_mark_stack);
1100 }
1101 }
1102 }
1103
1104 #ifndef ENTRIES_TO_GET
1105 # define ENTRIES_TO_GET 5
1106 #endif
1107
1108 /* Mark using the local mark stack until the global mark stack is empty */
1109 /* and there are no active workers. Update GC_first_nonempty to reflect */
1110 /* progress. Caller holds the mark lock. */
1111 /* Caller has already incremented GC_helper_count. We decrement it, */
1112 /* and maintain GC_active_count. */
GC_mark_local(mse * local_mark_stack,int id)1113 STATIC void GC_mark_local(mse *local_mark_stack, int id)
1114 {
1115 mse * my_first_nonempty;
1116
1117 GC_active_count++;
1118 my_first_nonempty = (mse *)AO_load(&GC_first_nonempty);
1119 GC_ASSERT((word)GC_mark_stack <= (word)my_first_nonempty);
1120 GC_ASSERT((word)my_first_nonempty
1121 <= (word)AO_load((volatile AO_t *)&GC_mark_stack_top) + sizeof(mse));
1122 GC_VERBOSE_LOG_PRINTF("Starting mark helper %d\n", id);
1123 GC_release_mark_lock();
1124 for (;;) {
1125 size_t n_on_stack;
1126 unsigned n_to_get;
1127 mse * my_top;
1128 mse * local_top;
1129 mse * global_first_nonempty = (mse *)AO_load(&GC_first_nonempty);
1130
1131 GC_ASSERT((word)my_first_nonempty >= (word)GC_mark_stack &&
1132 (word)my_first_nonempty <=
1133 (word)AO_load((volatile AO_t *)&GC_mark_stack_top)
1134 + sizeof(mse));
1135 GC_ASSERT((word)global_first_nonempty >= (word)GC_mark_stack);
1136 if ((word)my_first_nonempty < (word)global_first_nonempty) {
1137 my_first_nonempty = global_first_nonempty;
1138 } else if ((word)global_first_nonempty < (word)my_first_nonempty) {
1139 (void)AO_compare_and_swap(&GC_first_nonempty,
1140 (AO_t)global_first_nonempty,
1141 (AO_t)my_first_nonempty);
1142 /* If this fails, we just go ahead, without updating */
1143 /* GC_first_nonempty. */
1144 }
1145 /* Perhaps we should also update GC_first_nonempty, if it */
1146 /* is less. But that would require using atomic updates. */
1147 my_top = (mse *)AO_load_acquire((volatile AO_t *)(&GC_mark_stack_top));
1148 if ((word)my_top < (word)my_first_nonempty) {
1149 GC_acquire_mark_lock();
1150 my_top = GC_mark_stack_top;
1151 /* Asynchronous modification impossible here, */
1152 /* since we hold mark lock. */
1153 n_on_stack = my_top - my_first_nonempty + 1;
1154 if (0 == n_on_stack) {
1155 GC_active_count--;
1156 GC_ASSERT(GC_active_count <= GC_helper_count);
1157 /* Other markers may redeposit objects */
1158 /* on the stack. */
1159 if (0 == GC_active_count) GC_notify_all_marker();
1160 while (GC_active_count > 0
1161 && (word)AO_load(&GC_first_nonempty)
1162 > (word)GC_mark_stack_top) {
1163 /* We will be notified if either GC_active_count */
1164 /* reaches zero, or if more objects are pushed on */
1165 /* the global mark stack. */
1166 GC_wait_marker();
1167 }
1168 if (GC_active_count == 0
1169 && (word)AO_load(&GC_first_nonempty)
1170 > (word)GC_mark_stack_top) {
1171 GC_bool need_to_notify = FALSE;
1172 /* The above conditions can't be falsified while we */
1173 /* hold the mark lock, since neither */
1174 /* GC_active_count nor GC_mark_stack_top can */
1175 /* change. GC_first_nonempty can only be */
1176 /* incremented asynchronously. Thus we know that */
1177 /* both conditions actually held simultaneously. */
1178 GC_helper_count--;
1179 if (0 == GC_helper_count) need_to_notify = TRUE;
1180 GC_VERBOSE_LOG_PRINTF("Finished mark helper %d\n", id);
1181 if (need_to_notify) GC_notify_all_marker();
1182 return;
1183 }
1184 /* else there's something on the stack again, or */
1185 /* another helper may push something. */
1186 GC_active_count++;
1187 GC_ASSERT(GC_active_count > 0);
1188 GC_release_mark_lock();
1189 continue;
1190 } else {
1191 GC_release_mark_lock();
1192 }
1193 } else {
1194 n_on_stack = my_top - my_first_nonempty + 1;
1195 }
1196 n_to_get = ENTRIES_TO_GET;
1197 if (n_on_stack < 2 * ENTRIES_TO_GET) n_to_get = 1;
1198 local_top = GC_steal_mark_stack(my_first_nonempty, my_top,
1199 local_mark_stack, n_to_get,
1200 &my_first_nonempty);
1201 GC_ASSERT((word)my_first_nonempty >= (word)GC_mark_stack &&
1202 (word)my_first_nonempty <=
1203 (word)AO_load((volatile AO_t *)&GC_mark_stack_top)
1204 + sizeof(mse));
1205 GC_do_local_mark(local_mark_stack, local_top);
1206 }
1207 }
1208
1209 /* Perform Parallel mark. */
1210 /* We hold the GC lock, not the mark lock. */
1211 /* Currently runs until the mark stack is */
1212 /* empty. */
GC_do_parallel_mark(void)1213 STATIC void GC_do_parallel_mark(void)
1214 {
1215 GC_acquire_mark_lock();
1216 GC_ASSERT(I_HOLD_LOCK());
1217 /* This could be a GC_ASSERT, but it seems safer to keep it on */
1218 /* all the time, especially since it's cheap. */
1219 if (GC_help_wanted || GC_active_count != 0 || GC_helper_count != 0)
1220 ABORT("Tried to start parallel mark in bad state");
1221 GC_VERBOSE_LOG_PRINTF("Starting marking for mark phase number %lu\n",
1222 (unsigned long)GC_mark_no);
1223 GC_first_nonempty = (AO_t)GC_mark_stack;
1224 GC_active_count = 0;
1225 GC_helper_count = 1;
1226 GC_help_wanted = TRUE;
1227 GC_notify_all_marker();
1228 /* Wake up potential helpers. */
1229 GC_mark_local(main_local_mark_stack, 0);
1230 GC_help_wanted = FALSE;
1231 /* Done; clean up. */
1232 while (GC_helper_count > 0) {
1233 GC_wait_marker();
1234 }
1235 /* GC_helper_count cannot be incremented while GC_help_wanted == FALSE */
1236 GC_VERBOSE_LOG_PRINTF("Finished marking for mark phase number %lu\n",
1237 (unsigned long)GC_mark_no);
1238 GC_mark_no++;
1239 GC_release_mark_lock();
1240 GC_notify_all_marker();
1241 }
1242
1243
1244 /* Try to help out the marker, if it's running. */
1245 /* We do not hold the GC lock, but the requestor does. */
1246 /* And we hold the mark lock. */
GC_help_marker(word my_mark_no)1247 GC_INNER void GC_help_marker(word my_mark_no)
1248 {
1249 # define my_id my_id_mse.mse_descr.w
1250 mse my_id_mse; /* align local_mark_stack explicitly */
1251 mse local_mark_stack[LOCAL_MARK_STACK_SIZE];
1252 /* Note: local_mark_stack is quite big (up to 128 KiB). */
1253
1254 GC_ASSERT(GC_parallel);
1255 while (GC_mark_no < my_mark_no
1256 || (!GC_help_wanted && GC_mark_no == my_mark_no)) {
1257 GC_wait_marker();
1258 }
1259 my_id = GC_helper_count;
1260 if (GC_mark_no != my_mark_no || my_id > (unsigned)GC_markers_m1) {
1261 /* Second test is useful only if original threads can also */
1262 /* act as helpers. Under Linux they can't. */
1263 return;
1264 }
1265 GC_helper_count = (unsigned)my_id + 1;
1266 GC_mark_local(local_mark_stack, (int)my_id);
1267 /* GC_mark_local decrements GC_helper_count. */
1268 # undef my_id
1269 }
1270
1271 #endif /* PARALLEL_MARK */
1272
GC_scratch_recycle_inner(void * ptr,size_t bytes)1273 GC_INNER void GC_scratch_recycle_inner(void *ptr, size_t bytes)
1274 {
1275 if (ptr != NULL) {
1276 size_t page_offset = (word)ptr & (GC_page_size - 1);
1277 size_t displ = 0;
1278 size_t recycled_bytes;
1279
1280 GC_ASSERT(bytes != 0);
1281 GC_ASSERT(GC_page_size != 0);
1282 /* TODO: Assert correct memory flags if GWW_VDB */
1283 if (page_offset != 0)
1284 displ = GC_page_size - page_offset;
1285 recycled_bytes = bytes > displ ? (bytes - displ) & ~(GC_page_size-1) : 0;
1286 GC_COND_LOG_PRINTF("Recycle %lu/%lu scratch-allocated bytes at %p\n",
1287 (unsigned long)recycled_bytes, (unsigned long)bytes,
1288 ptr);
1289 if (recycled_bytes > 0)
1290 GC_add_to_heap((struct hblk *)((word)ptr + displ), recycled_bytes);
1291 }
1292 }
1293
1294 /* Allocate or reallocate space for mark stack of size n entries. */
1295 /* May silently fail. */
alloc_mark_stack(size_t n)1296 static void alloc_mark_stack(size_t n)
1297 {
1298 mse * new_stack = (mse *)GC_scratch_alloc(n * sizeof(struct GC_ms_entry));
1299 # ifdef GWW_VDB
1300 /* Don't recycle a stack segment obtained with the wrong flags. */
1301 /* Win32 GetWriteWatch requires the right kind of memory. */
1302 static GC_bool GC_incremental_at_stack_alloc = FALSE;
1303 GC_bool recycle_old = !GC_auto_incremental
1304 || GC_incremental_at_stack_alloc;
1305
1306 GC_incremental_at_stack_alloc = GC_auto_incremental;
1307 # else
1308 # define recycle_old TRUE
1309 # endif
1310
1311 GC_mark_stack_too_small = FALSE;
1312 if (GC_mark_stack != NULL) {
1313 if (new_stack != 0) {
1314 if (recycle_old) {
1315 /* Recycle old space */
1316 GC_scratch_recycle_inner(GC_mark_stack,
1317 GC_mark_stack_size * sizeof(struct GC_ms_entry));
1318 }
1319 GC_mark_stack = new_stack;
1320 GC_mark_stack_size = n;
1321 /* FIXME: Do we need some way to reset GC_mark_stack_size? */
1322 GC_mark_stack_limit = new_stack + n;
1323 GC_COND_LOG_PRINTF("Grew mark stack to %lu frames\n",
1324 (unsigned long)GC_mark_stack_size);
1325 } else {
1326 WARN("Failed to grow mark stack to %" WARN_PRIdPTR " frames\n", n);
1327 }
1328 } else if (NULL == new_stack) {
1329 GC_err_printf("No space for mark stack\n");
1330 EXIT();
1331 } else {
1332 GC_mark_stack = new_stack;
1333 GC_mark_stack_size = n;
1334 GC_mark_stack_limit = new_stack + n;
1335 }
1336 GC_mark_stack_top = GC_mark_stack-1;
1337 }
1338
GC_mark_init(void)1339 GC_INNER void GC_mark_init(void)
1340 {
1341 alloc_mark_stack(INITIAL_MARK_STACK_SIZE);
1342 }
1343
1344 /*
1345 * Push all locations between b and t onto the mark stack.
1346 * b is the first location to be checked. t is one past the last
1347 * location to be checked.
1348 * Should only be used if there is no possibility of mark stack
1349 * overflow.
1350 */
GC_push_all(void * bottom,void * top)1351 GC_API void GC_CALL GC_push_all(void *bottom, void *top)
1352 {
1353 word length;
1354
1355 bottom = (void *)(((word)bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1356 top = (void *)((word)top & ~(ALIGNMENT-1));
1357 if ((word)bottom >= (word)top) return;
1358
1359 GC_mark_stack_top++;
1360 if ((word)GC_mark_stack_top >= (word)GC_mark_stack_limit) {
1361 ABORT("Unexpected mark stack overflow");
1362 }
1363 length = (word)top - (word)bottom;
1364 # if GC_DS_TAGS > ALIGNMENT - 1
1365 length += GC_DS_TAGS;
1366 length &= ~GC_DS_TAGS;
1367 # endif
1368 GC_mark_stack_top -> mse_start = (ptr_t)bottom;
1369 GC_mark_stack_top -> mse_descr.w = length;
1370 }
1371
1372 #ifndef GC_DISABLE_INCREMENTAL
1373
1374 /* Analogous to the above, but push only those pages h with */
1375 /* dirty_fn(h) != 0. We use GC_push_all to actually push the block. */
1376 /* Used both to selectively push dirty pages, or to push a block in */
1377 /* piecemeal fashion, to allow for more marking concurrency. */
1378 /* Will not overflow mark stack if GC_push_all pushes a small fixed */
1379 /* number of entries. (This is invoked only if GC_push_all pushes */
1380 /* a single entry, or if it marks each object before pushing it, thus */
1381 /* ensuring progress in the event of a stack overflow.) */
GC_push_selected(ptr_t bottom,ptr_t top,GC_bool (* dirty_fn)(struct hblk *))1382 STATIC void GC_push_selected(ptr_t bottom, ptr_t top,
1383 GC_bool (*dirty_fn)(struct hblk *))
1384 {
1385 struct hblk * h;
1386
1387 bottom = (ptr_t)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1388 top = (ptr_t)(((word) top) & ~(ALIGNMENT-1));
1389 if ((word)bottom >= (word)top) return;
1390
1391 h = HBLKPTR(bottom + HBLKSIZE);
1392 if ((word)top <= (word)h) {
1393 if ((*dirty_fn)(h-1)) {
1394 GC_push_all(bottom, top);
1395 }
1396 return;
1397 }
1398 if ((*dirty_fn)(h-1)) {
1399 if ((word)(GC_mark_stack_top - GC_mark_stack)
1400 > 3 * GC_mark_stack_size / 4) {
1401 GC_push_all(bottom, top);
1402 return;
1403 }
1404 GC_push_all(bottom, h);
1405 }
1406
1407 while ((word)(h+1) <= (word)top) {
1408 if ((*dirty_fn)(h)) {
1409 if ((word)(GC_mark_stack_top - GC_mark_stack)
1410 > 3 * GC_mark_stack_size / 4) {
1411 /* Danger of mark stack overflow */
1412 GC_push_all(h, top);
1413 return;
1414 } else {
1415 GC_push_all(h, h + 1);
1416 }
1417 }
1418 h++;
1419 }
1420
1421 if ((ptr_t)h != top && (*dirty_fn)(h)) {
1422 GC_push_all(h, top);
1423 }
1424 }
1425
GC_push_conditional(void * bottom,void * top,int all)1426 GC_API void GC_CALL GC_push_conditional(void *bottom, void *top, int all)
1427 {
1428 if (!all) {
1429 GC_push_selected((ptr_t)bottom, (ptr_t)top, GC_page_was_dirty);
1430 } else {
1431 # ifdef PROC_VDB
1432 if (GC_auto_incremental) {
1433 /* Pages that were never dirtied cannot contain pointers. */
1434 GC_push_selected((ptr_t)bottom, (ptr_t)top, GC_page_was_ever_dirty);
1435 } else
1436 # endif
1437 /* else */ {
1438 GC_push_all(bottom, top);
1439 }
1440 }
1441 }
1442 #else
GC_push_conditional(void * bottom,void * top,int all GC_ATTR_UNUSED)1443 GC_API void GC_CALL GC_push_conditional(void *bottom, void *top,
1444 int all GC_ATTR_UNUSED)
1445 {
1446 GC_push_all(bottom, top);
1447 }
1448 #endif /* GC_DISABLE_INCREMENTAL */
1449
1450 #if defined(MSWIN32) || defined(MSWINCE)
GC_push_one(word p)1451 void __cdecl GC_push_one(word p)
1452 #else
1453 void GC_push_one(word p)
1454 #endif
1455 {
1456 GC_PUSH_ONE_STACK(p, MARKED_FROM_REGISTER);
1457 }
1458
GC_mark_and_push(void * obj,mse * mark_stack_ptr,mse * mark_stack_limit,void ** src GC_ATTR_UNUSED)1459 GC_API struct GC_ms_entry * GC_CALL GC_mark_and_push(void *obj,
1460 mse *mark_stack_ptr,
1461 mse *mark_stack_limit,
1462 void ** src GC_ATTR_UNUSED)
1463 {
1464 hdr * hhdr;
1465
1466 PREFETCH(obj);
1467 GET_HDR(obj, hhdr);
1468 if ((EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr), FALSE)
1469 && (!GC_all_interior_pointers
1470 || NULL == (hhdr = GC_find_header((ptr_t)GC_base(obj)))))
1471 || EXPECT(HBLK_IS_FREE(hhdr), FALSE)) {
1472 GC_ADD_TO_BLACK_LIST_NORMAL(obj, (ptr_t)src);
1473 return mark_stack_ptr;
1474 }
1475 return GC_push_contents_hdr((ptr_t)obj, mark_stack_ptr, mark_stack_limit,
1476 (ptr_t)src, hhdr, TRUE);
1477 }
1478
1479 /* Mark and push (i.e. gray) a single object p onto the main */
1480 /* mark stack. Consider p to be valid if it is an interior */
1481 /* pointer. */
1482 /* The object p has passed a preliminary pointer validity */
1483 /* test, but we do not definitely know whether it is valid. */
1484 /* Mark bits are NOT atomically updated. Thus this must be the */
1485 /* only thread setting them. */
1486 # if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
GC_mark_and_push_stack(ptr_t p,ptr_t source)1487 GC_INNER void GC_mark_and_push_stack(ptr_t p, ptr_t source)
1488 # else
1489 GC_INNER void GC_mark_and_push_stack(ptr_t p)
1490 # define source ((ptr_t)0)
1491 # endif
1492 {
1493 hdr * hhdr;
1494 ptr_t r = p;
1495
1496 PREFETCH(p);
1497 GET_HDR(p, hhdr);
1498 if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr), FALSE)
1499 && (NULL == hhdr
1500 || (r = (ptr_t)GC_base(p)) == NULL
1501 || (hhdr = HDR(r)) == NULL)) {
1502 GC_ADD_TO_BLACK_LIST_STACK(p, source);
1503 return;
1504 }
1505 if (EXPECT(HBLK_IS_FREE(hhdr), FALSE)) {
1506 GC_ADD_TO_BLACK_LIST_NORMAL(p, source);
1507 return;
1508 }
1509 # ifdef THREADS
1510 /* Pointer is on the stack. We may have dirtied the object */
1511 /* it points to, but have not called GC_dirty yet. */
1512 GC_dirty(p); /* entire object */
1513 # endif
1514 GC_mark_stack_top = GC_push_contents_hdr(r, GC_mark_stack_top,
1515 GC_mark_stack_limit,
1516 source, hhdr, FALSE);
1517 /* We silently ignore pointers to near the end of a block, */
1518 /* which is very mildly suboptimal. */
1519 /* FIXME: We should probably add a header word to address */
1520 /* this. */
1521 }
1522 # undef source
1523
1524 #ifdef TRACE_BUF
1525
1526 # ifndef TRACE_ENTRIES
1527 # define TRACE_ENTRIES 1000
1528 # endif
1529
1530 struct trace_entry {
1531 char * kind;
1532 word gc_no;
1533 word bytes_allocd;
1534 word arg1;
1535 word arg2;
1536 } GC_trace_buf[TRACE_ENTRIES] = { { NULL, 0, 0, 0, 0 } };
1537
1538 int GC_trace_buf_ptr = 0;
1539
GC_add_trace_entry(char * kind,word arg1,word arg2)1540 void GC_add_trace_entry(char *kind, word arg1, word arg2)
1541 {
1542 GC_trace_buf[GC_trace_buf_ptr].kind = kind;
1543 GC_trace_buf[GC_trace_buf_ptr].gc_no = GC_gc_no;
1544 GC_trace_buf[GC_trace_buf_ptr].bytes_allocd = GC_bytes_allocd;
1545 GC_trace_buf[GC_trace_buf_ptr].arg1 = arg1 ^ 0x80000000;
1546 GC_trace_buf[GC_trace_buf_ptr].arg2 = arg2 ^ 0x80000000;
1547 GC_trace_buf_ptr++;
1548 if (GC_trace_buf_ptr >= TRACE_ENTRIES) GC_trace_buf_ptr = 0;
1549 }
1550
GC_print_trace_inner(word gc_no)1551 GC_API void GC_CALL GC_print_trace_inner(word gc_no)
1552 {
1553 int i;
1554
1555 for (i = GC_trace_buf_ptr-1; i != GC_trace_buf_ptr; i--) {
1556 struct trace_entry *p;
1557
1558 if (i < 0) i = TRACE_ENTRIES-1;
1559 p = GC_trace_buf + i;
1560 if (p -> gc_no < gc_no || p -> kind == 0) {
1561 return;
1562 }
1563 GC_printf("Trace:%s (gc:%u, bytes:%lu) 0x%lX, 0x%lX\n",
1564 p -> kind, (unsigned)p -> gc_no,
1565 (unsigned long)p -> bytes_allocd,
1566 (long)p->arg1 ^ 0x80000000L, (long)p->arg2 ^ 0x80000000L);
1567 }
1568 GC_printf("Trace incomplete\n");
1569 }
1570
GC_print_trace(word gc_no)1571 GC_API void GC_CALL GC_print_trace(word gc_no)
1572 {
1573 DCL_LOCK_STATE;
1574
1575 LOCK();
1576 GC_print_trace_inner(gc_no);
1577 UNLOCK();
1578 }
1579
1580 #endif /* TRACE_BUF */
1581
1582 /*
1583 * A version of GC_push_all that treats all interior pointers as valid
1584 * and scans the entire region immediately, in case the contents
1585 * change.
1586 */
1587 GC_ATTR_NO_SANITIZE_ADDR GC_ATTR_NO_SANITIZE_MEMORY GC_ATTR_NO_SANITIZE_THREAD
GC_push_all_eager(void * bottom,void * top)1588 GC_API void GC_CALL GC_push_all_eager(void *bottom, void *top)
1589 {
1590 word * b = (word *)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1591 word * t = (word *)(((word) top) & ~(ALIGNMENT-1));
1592 REGISTER word *p;
1593 REGISTER word *lim;
1594 REGISTER ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
1595 REGISTER ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
1596 # define GC_greatest_plausible_heap_addr greatest_ha
1597 # define GC_least_plausible_heap_addr least_ha
1598
1599 if (top == 0) return;
1600 /* check all pointers in range and push if they appear */
1601 /* to be valid. */
1602 lim = t - 1 /* longword */;
1603 for (p = b; (word)p <= (word)lim;
1604 p = (word *)(((ptr_t)p) + ALIGNMENT)) {
1605 REGISTER word q = *p;
1606
1607 GC_PUSH_ONE_STACK(q, p);
1608 }
1609 # undef GC_greatest_plausible_heap_addr
1610 # undef GC_least_plausible_heap_addr
1611 }
1612
GC_push_all_stack(ptr_t bottom,ptr_t top)1613 GC_INNER void GC_push_all_stack(ptr_t bottom, ptr_t top)
1614 {
1615 # ifndef NEED_FIXUP_POINTER
1616 if (GC_all_interior_pointers
1617 # if defined(THREADS) && defined(MPROTECT_VDB)
1618 && !GC_auto_incremental
1619 # endif
1620 && (word)GC_mark_stack_top
1621 < (word)(GC_mark_stack_limit - INITIAL_MARK_STACK_SIZE/8)) {
1622 GC_push_all(bottom, top);
1623 } else
1624 # endif
1625 /* else */ {
1626 GC_push_all_eager(bottom, top);
1627 }
1628 }
1629
1630 #if defined(WRAP_MARK_SOME) && defined(PARALLEL_MARK)
1631 /* Similar to GC_push_conditional but scans the whole region immediately. */
1632 GC_ATTR_NO_SANITIZE_ADDR GC_ATTR_NO_SANITIZE_MEMORY
1633 GC_ATTR_NO_SANITIZE_THREAD
GC_push_conditional_eager(void * bottom,void * top,GC_bool all)1634 GC_INNER void GC_push_conditional_eager(void *bottom, void *top,
1635 GC_bool all)
1636 {
1637 word * b = (word *)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1638 word * t = (word *)(((word) top) & ~(ALIGNMENT-1));
1639 REGISTER word *p;
1640 REGISTER word *lim;
1641 REGISTER ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
1642 REGISTER ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
1643 # define GC_greatest_plausible_heap_addr greatest_ha
1644 # define GC_least_plausible_heap_addr least_ha
1645
1646 if (top == NULL)
1647 return;
1648 (void)all; /* TODO: If !all then scan only dirty pages. */
1649
1650 lim = t - 1;
1651 for (p = b; (word)p <= (word)lim; p = (word *)((ptr_t)p + ALIGNMENT)) {
1652 REGISTER word q = *p;
1653
1654 GC_PUSH_ONE_HEAP(q, p, GC_mark_stack_top);
1655 }
1656 # undef GC_greatest_plausible_heap_addr
1657 # undef GC_least_plausible_heap_addr
1658 }
1659 #endif /* WRAP_MARK_SOME && PARALLEL_MARK */
1660
1661 #if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES) && \
1662 defined(MARK_BIT_PER_GRANULE)
1663 # if GC_GRANULE_WORDS == 1
1664 # define USE_PUSH_MARKED_ACCELERATORS
1665 # define PUSH_GRANULE(q) \
1666 do { \
1667 word qcontents = (q)[0]; \
1668 GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1669 } while (0)
1670 # elif GC_GRANULE_WORDS == 2
1671 # define USE_PUSH_MARKED_ACCELERATORS
1672 # define PUSH_GRANULE(q) \
1673 do { \
1674 word qcontents = (q)[0]; \
1675 GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1676 qcontents = (q)[1]; \
1677 GC_PUSH_ONE_HEAP(qcontents, (q)+1, GC_mark_stack_top); \
1678 } while (0)
1679 # elif GC_GRANULE_WORDS == 4
1680 # define USE_PUSH_MARKED_ACCELERATORS
1681 # define PUSH_GRANULE(q) \
1682 do { \
1683 word qcontents = (q)[0]; \
1684 GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1685 qcontents = (q)[1]; \
1686 GC_PUSH_ONE_HEAP(qcontents, (q)+1, GC_mark_stack_top); \
1687 qcontents = (q)[2]; \
1688 GC_PUSH_ONE_HEAP(qcontents, (q)+2, GC_mark_stack_top); \
1689 qcontents = (q)[3]; \
1690 GC_PUSH_ONE_HEAP(qcontents, (q)+3, GC_mark_stack_top); \
1691 } while (0)
1692 # endif
1693 #endif /* !USE_MARK_BYTES && MARK_BIT_PER_GRANULE */
1694
1695 #ifdef USE_PUSH_MARKED_ACCELERATORS
1696 /* Push all objects reachable from marked objects in the given block */
1697 /* containing objects of size 1 granule. */
GC_push_marked1(struct hblk * h,hdr * hhdr)1698 STATIC void GC_push_marked1(struct hblk *h, hdr *hhdr)
1699 {
1700 word * mark_word_addr = &(hhdr->hb_marks[0]);
1701 word *p;
1702 word *plim;
1703
1704 /* Allow registers to be used for some frequently accessed */
1705 /* global variables. Otherwise aliasing issues are likely */
1706 /* to prevent that. */
1707 ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
1708 ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
1709 mse * mark_stack_top = GC_mark_stack_top;
1710 mse * mark_stack_limit = GC_mark_stack_limit;
1711
1712 # undef GC_mark_stack_top
1713 # undef GC_mark_stack_limit
1714 # define GC_mark_stack_top mark_stack_top
1715 # define GC_mark_stack_limit mark_stack_limit
1716 # define GC_greatest_plausible_heap_addr greatest_ha
1717 # define GC_least_plausible_heap_addr least_ha
1718
1719 p = (word *)(h->hb_body);
1720 plim = (word *)(((word)h) + HBLKSIZE);
1721
1722 /* go through all words in block */
1723 while ((word)p < (word)plim) {
1724 word mark_word = *mark_word_addr++;
1725 word *q = p;
1726
1727 while(mark_word != 0) {
1728 if (mark_word & 1) {
1729 PUSH_GRANULE(q);
1730 }
1731 q += GC_GRANULE_WORDS;
1732 mark_word >>= 1;
1733 }
1734 p += WORDSZ*GC_GRANULE_WORDS;
1735 }
1736
1737 # undef GC_greatest_plausible_heap_addr
1738 # undef GC_least_plausible_heap_addr
1739 # undef GC_mark_stack_top
1740 # undef GC_mark_stack_limit
1741 # define GC_mark_stack_limit GC_arrays._mark_stack_limit
1742 # define GC_mark_stack_top GC_arrays._mark_stack_top
1743 GC_mark_stack_top = mark_stack_top;
1744 }
1745
1746
1747 #ifndef UNALIGNED_PTRS
1748
1749 /* Push all objects reachable from marked objects in the given block */
1750 /* of size 2 (granules) objects. */
GC_push_marked2(struct hblk * h,hdr * hhdr)1751 STATIC void GC_push_marked2(struct hblk *h, hdr *hhdr)
1752 {
1753 word * mark_word_addr = &(hhdr->hb_marks[0]);
1754 word *p;
1755 word *plim;
1756
1757 ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
1758 ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
1759 mse * mark_stack_top = GC_mark_stack_top;
1760 mse * mark_stack_limit = GC_mark_stack_limit;
1761
1762 # undef GC_mark_stack_top
1763 # undef GC_mark_stack_limit
1764 # define GC_mark_stack_top mark_stack_top
1765 # define GC_mark_stack_limit mark_stack_limit
1766 # define GC_greatest_plausible_heap_addr greatest_ha
1767 # define GC_least_plausible_heap_addr least_ha
1768
1769 p = (word *)(h->hb_body);
1770 plim = (word *)(((word)h) + HBLKSIZE);
1771
1772 /* go through all words in block */
1773 while ((word)p < (word)plim) {
1774 word mark_word = *mark_word_addr++;
1775 word *q = p;
1776
1777 while(mark_word != 0) {
1778 if (mark_word & 1) {
1779 PUSH_GRANULE(q);
1780 PUSH_GRANULE(q + GC_GRANULE_WORDS);
1781 }
1782 q += 2 * GC_GRANULE_WORDS;
1783 mark_word >>= 2;
1784 }
1785 p += WORDSZ*GC_GRANULE_WORDS;
1786 }
1787
1788 # undef GC_greatest_plausible_heap_addr
1789 # undef GC_least_plausible_heap_addr
1790 # undef GC_mark_stack_top
1791 # undef GC_mark_stack_limit
1792 # define GC_mark_stack_limit GC_arrays._mark_stack_limit
1793 # define GC_mark_stack_top GC_arrays._mark_stack_top
1794 GC_mark_stack_top = mark_stack_top;
1795 }
1796
1797 # if GC_GRANULE_WORDS < 4
1798 /* Push all objects reachable from marked objects in the given block */
1799 /* of size 4 (granules) objects. */
1800 /* There is a risk of mark stack overflow here. But we handle that. */
1801 /* And only unmarked objects get pushed, so it's not very likely. */
GC_push_marked4(struct hblk * h,hdr * hhdr)1802 STATIC void GC_push_marked4(struct hblk *h, hdr *hhdr)
1803 {
1804 word * mark_word_addr = &(hhdr->hb_marks[0]);
1805 word *p;
1806 word *plim;
1807
1808 ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
1809 ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
1810 mse * mark_stack_top = GC_mark_stack_top;
1811 mse * mark_stack_limit = GC_mark_stack_limit;
1812
1813 # undef GC_mark_stack_top
1814 # undef GC_mark_stack_limit
1815 # define GC_mark_stack_top mark_stack_top
1816 # define GC_mark_stack_limit mark_stack_limit
1817 # define GC_greatest_plausible_heap_addr greatest_ha
1818 # define GC_least_plausible_heap_addr least_ha
1819
1820 p = (word *)(h->hb_body);
1821 plim = (word *)(((word)h) + HBLKSIZE);
1822
1823 /* go through all words in block */
1824 while ((word)p < (word)plim) {
1825 word mark_word = *mark_word_addr++;
1826 word *q = p;
1827
1828 while(mark_word != 0) {
1829 if (mark_word & 1) {
1830 PUSH_GRANULE(q);
1831 PUSH_GRANULE(q + GC_GRANULE_WORDS);
1832 PUSH_GRANULE(q + 2*GC_GRANULE_WORDS);
1833 PUSH_GRANULE(q + 3*GC_GRANULE_WORDS);
1834 }
1835 q += 4 * GC_GRANULE_WORDS;
1836 mark_word >>= 4;
1837 }
1838 p += WORDSZ*GC_GRANULE_WORDS;
1839 }
1840 # undef GC_greatest_plausible_heap_addr
1841 # undef GC_least_plausible_heap_addr
1842 # undef GC_mark_stack_top
1843 # undef GC_mark_stack_limit
1844 # define GC_mark_stack_limit GC_arrays._mark_stack_limit
1845 # define GC_mark_stack_top GC_arrays._mark_stack_top
1846 GC_mark_stack_top = mark_stack_top;
1847 }
1848
1849 #endif /* GC_GRANULE_WORDS < 4 */
1850
1851 #endif /* UNALIGNED_PTRS */
1852
1853 #endif /* USE_PUSH_MARKED_ACCELERATORS */
1854
1855 /* Push all objects reachable from marked objects in the given block */
GC_push_marked(struct hblk * h,hdr * hhdr)1856 STATIC void GC_push_marked(struct hblk *h, hdr *hhdr)
1857 {
1858 word sz = hhdr -> hb_sz;
1859 word descr = hhdr -> hb_descr;
1860 ptr_t p;
1861 word bit_no;
1862 ptr_t lim;
1863 mse * GC_mark_stack_top_reg;
1864 mse * mark_stack_limit = GC_mark_stack_limit;
1865
1866 /* Some quick shortcuts: */
1867 if ((/* 0 | */ GC_DS_LENGTH) == descr) return;
1868 if (GC_block_empty(hhdr)/* nothing marked */) return;
1869 # if !defined(GC_DISABLE_INCREMENTAL)
1870 GC_n_rescuing_pages++;
1871 # endif
1872 GC_objects_are_marked = TRUE;
1873 if (sz > MAXOBJBYTES) {
1874 lim = h -> hb_body;
1875 } else {
1876 lim = (ptr_t)((word)(h + 1)->hb_body - sz);
1877 }
1878
1879 switch(BYTES_TO_GRANULES(sz)) {
1880 # if defined(USE_PUSH_MARKED_ACCELERATORS)
1881 case 1:
1882 GC_push_marked1(h, hhdr);
1883 break;
1884 # if !defined(UNALIGNED_PTRS)
1885 case 2:
1886 GC_push_marked2(h, hhdr);
1887 break;
1888 # if GC_GRANULE_WORDS < 4
1889 case 4:
1890 GC_push_marked4(h, hhdr);
1891 break;
1892 # endif
1893 # endif
1894 # else
1895 case 1: /* to suppress "switch statement contains no case" warning */
1896 # endif
1897 default:
1898 GC_mark_stack_top_reg = GC_mark_stack_top;
1899 for (p = h -> hb_body, bit_no = 0; (word)p <= (word)lim;
1900 p += sz, bit_no += MARK_BIT_OFFSET(sz)) {
1901 if (mark_bit_from_hdr(hhdr, bit_no)) {
1902 /* Mark from fields inside the object. */
1903 GC_mark_stack_top_reg = GC_push_obj(p, hhdr, GC_mark_stack_top_reg,
1904 mark_stack_limit);
1905 }
1906 }
1907 GC_mark_stack_top = GC_mark_stack_top_reg;
1908 }
1909 }
1910
1911 #ifdef ENABLE_DISCLAIM
1912 /* Unconditionally mark from all objects which have not been reclaimed. */
1913 /* This is useful in order to retain pointers which are reachable from */
1914 /* the disclaim notifiers. */
1915 /* */
1916 /* To determine whether an object has been reclaimed, we require that */
1917 /* any live object has a non-zero as one of the two lowest bits of the */
1918 /* first word. On the other hand, a reclaimed object is a members of */
1919 /* free-lists, and thus contains a word-aligned next-pointer as the */
1920 /* first word. */
GC_push_unconditionally(struct hblk * h,hdr * hhdr)1921 STATIC void GC_push_unconditionally(struct hblk *h, hdr *hhdr)
1922 {
1923 word sz = hhdr -> hb_sz;
1924 word descr = hhdr -> hb_descr;
1925 ptr_t p;
1926 ptr_t lim;
1927 mse * GC_mark_stack_top_reg;
1928 mse * mark_stack_limit = GC_mark_stack_limit;
1929
1930 if ((/* 0 | */ GC_DS_LENGTH) == descr)
1931 return;
1932
1933 # if !defined(GC_DISABLE_INCREMENTAL)
1934 GC_n_rescuing_pages++;
1935 # endif
1936 GC_objects_are_marked = TRUE;
1937 if (sz > MAXOBJBYTES)
1938 lim = h -> hb_body;
1939 else
1940 lim = (ptr_t)((word)(h + 1)->hb_body - sz);
1941
1942 GC_mark_stack_top_reg = GC_mark_stack_top;
1943 for (p = h -> hb_body; (word)p <= (word)lim; p += sz)
1944 if ((*(word *)p & 0x3) != 0)
1945 GC_mark_stack_top_reg = GC_push_obj(p, hhdr, GC_mark_stack_top_reg,
1946 mark_stack_limit);
1947 GC_mark_stack_top = GC_mark_stack_top_reg;
1948 }
1949 #endif /* ENABLE_DISCLAIM */
1950
1951 #ifndef GC_DISABLE_INCREMENTAL
1952 /* Test whether any page in the given block is dirty. */
GC_block_was_dirty(struct hblk * h,hdr * hhdr)1953 STATIC GC_bool GC_block_was_dirty(struct hblk *h, hdr *hhdr)
1954 {
1955 word sz = hhdr -> hb_sz;
1956
1957 if (sz <= MAXOBJBYTES) {
1958 return(GC_page_was_dirty(h));
1959 } else {
1960 ptr_t p = (ptr_t)h;
1961 while ((word)p < (word)h + sz) {
1962 if (GC_page_was_dirty((struct hblk *)p)) return(TRUE);
1963 p += HBLKSIZE;
1964 }
1965 return(FALSE);
1966 }
1967 }
1968 #endif /* GC_DISABLE_INCREMENTAL */
1969
1970 /* Similar to GC_push_marked, but skip over unallocated blocks */
1971 /* and return address of next plausible block. */
GC_push_next_marked(struct hblk * h)1972 STATIC struct hblk * GC_push_next_marked(struct hblk *h)
1973 {
1974 hdr * hhdr = HDR(h);
1975
1976 if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr) || HBLK_IS_FREE(hhdr), FALSE)) {
1977 h = GC_next_block(h, FALSE);
1978 if (NULL == h) return NULL;
1979 hhdr = GC_find_header((ptr_t)h);
1980 } else {
1981 # ifdef LINT2
1982 if (NULL == h) ABORT("Bad HDR() definition");
1983 # endif
1984 }
1985 GC_push_marked(h, hhdr);
1986 return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
1987 }
1988
1989 #ifndef GC_DISABLE_INCREMENTAL
1990 /* Identical to above, but mark only from dirty pages */
GC_push_next_marked_dirty(struct hblk * h)1991 STATIC struct hblk * GC_push_next_marked_dirty(struct hblk *h)
1992 {
1993 hdr * hhdr = HDR(h);
1994
1995 if (!GC_incremental) ABORT("Dirty bits not set up");
1996 for (;;) {
1997 if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr)
1998 || HBLK_IS_FREE(hhdr), FALSE)) {
1999 h = GC_next_block(h, FALSE);
2000 if (NULL == h) return NULL;
2001 hhdr = GC_find_header((ptr_t)h);
2002 } else {
2003 # ifdef LINT2
2004 if (NULL == h) ABORT("Bad HDR() definition");
2005 # endif
2006 }
2007 if (GC_block_was_dirty(h, hhdr))
2008 break;
2009 h += OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz);
2010 hhdr = HDR(h);
2011 }
2012 # ifdef ENABLE_DISCLAIM
2013 if ((hhdr -> hb_flags & MARK_UNCONDITIONALLY) != 0) {
2014 GC_push_unconditionally(h, hhdr);
2015
2016 /* Then we may ask, why not also add the MARK_UNCONDITIONALLY */
2017 /* case to GC_push_next_marked, which is also applied to */
2018 /* uncollectible blocks? But it seems to me that the function */
2019 /* does not need to scan uncollectible (and unconditionally */
2020 /* marked) blocks since those are already handled in the */
2021 /* MS_PUSH_UNCOLLECTABLE phase. */
2022 } else
2023 # endif
2024 /* else */ {
2025 GC_push_marked(h, hhdr);
2026 }
2027 return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
2028 }
2029 #endif /* !GC_DISABLE_INCREMENTAL */
2030
2031 /* Similar to above, but for uncollectible pages. Needed since we */
2032 /* do not clear marks for such pages, even for full collections. */
GC_push_next_marked_uncollectable(struct hblk * h)2033 STATIC struct hblk * GC_push_next_marked_uncollectable(struct hblk *h)
2034 {
2035 hdr * hhdr = HDR(h);
2036
2037 for (;;) {
2038 if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr)
2039 || HBLK_IS_FREE(hhdr), FALSE)) {
2040 h = GC_next_block(h, FALSE);
2041 if (NULL == h) return NULL;
2042 hhdr = GC_find_header((ptr_t)h);
2043 } else {
2044 # ifdef LINT2
2045 if (NULL == h) ABORT("Bad HDR() definition");
2046 # endif
2047 }
2048 if (hhdr -> hb_obj_kind == UNCOLLECTABLE) {
2049 GC_push_marked(h, hhdr);
2050 break;
2051 }
2052 # ifdef ENABLE_DISCLAIM
2053 if ((hhdr -> hb_flags & MARK_UNCONDITIONALLY) != 0) {
2054 GC_push_unconditionally(h, hhdr);
2055 break;
2056 }
2057 # endif
2058 h += OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz);
2059 hhdr = HDR(h);
2060 }
2061 return(h + OBJ_SZ_TO_BLOCKS(hhdr -> hb_sz));
2062 }
2063